maxframe 0.1.0b5__cp38-cp38-win_amd64.whl → 1.0.0__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp38-win_amd64.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -17,19 +17,32 @@ import uuid
|
|
|
17
17
|
import numpy as np
|
|
18
18
|
import pandas as pd
|
|
19
19
|
import pyarrow as pa
|
|
20
|
+
import pytest
|
|
20
21
|
from odps import ODPS
|
|
21
22
|
|
|
22
23
|
import maxframe.dataframe as md
|
|
23
|
-
from maxframe.
|
|
24
|
+
from maxframe.config import options
|
|
25
|
+
from maxframe.io.odpsio import ODPSTableIO
|
|
24
26
|
from maxframe.protocol import ODPSTableResultInfo, ResultType
|
|
25
27
|
from maxframe.tests.utils import tn
|
|
26
28
|
|
|
27
29
|
from ..fetcher import ODPSTableFetcher
|
|
28
30
|
|
|
29
31
|
|
|
30
|
-
|
|
32
|
+
@pytest.fixture
|
|
33
|
+
def switch_table_io(request):
|
|
34
|
+
old_use_common_table = options.use_common_table
|
|
35
|
+
try:
|
|
36
|
+
options.use_common_table = request.param
|
|
37
|
+
yield
|
|
38
|
+
finally:
|
|
39
|
+
options.use_common_table = old_use_common_table
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@pytest.mark.parametrize("switch_table_io", [False, True], indirect=True)
|
|
43
|
+
async def test_table_fetcher(switch_table_io):
|
|
31
44
|
odps_entry = ODPS.from_environments()
|
|
32
|
-
halo_table_io =
|
|
45
|
+
halo_table_io = ODPSTableIO(odps_entry)
|
|
33
46
|
fetcher = ODPSTableFetcher(odps_entry)
|
|
34
47
|
|
|
35
48
|
data = pd.DataFrame(
|
|
@@ -58,6 +71,11 @@ async def test_table_fetcher():
|
|
|
58
71
|
assert len(fetched) == 1000
|
|
59
72
|
pd.testing.assert_frame_equal(raw_data, fetched)
|
|
60
73
|
|
|
74
|
+
result_info = ODPSTableResultInfo(ResultType.ODPS_TABLE, full_table_name=table_name)
|
|
75
|
+
fetched = await fetcher.fetch(tileable, result_info, [slice(None, 2000), None])
|
|
76
|
+
assert len(fetched) == 1000
|
|
77
|
+
pd.testing.assert_frame_equal(raw_data, fetched)
|
|
78
|
+
|
|
61
79
|
result_info = ODPSTableResultInfo(ResultType.ODPS_TABLE, full_table_name=table_name)
|
|
62
80
|
fetched = await fetcher.fetch(tileable, result_info, [2, None])
|
|
63
81
|
assert len(fetched) == 1
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
|
-
from typing import Dict
|
|
16
|
+
from typing import Any, Dict
|
|
17
17
|
|
|
18
18
|
import mock
|
|
19
19
|
import numpy as np
|
|
@@ -23,7 +23,10 @@ from odps import ODPS
|
|
|
23
23
|
|
|
24
24
|
import maxframe.dataframe as md
|
|
25
25
|
import maxframe.remote as mr
|
|
26
|
+
from maxframe.config import options
|
|
27
|
+
from maxframe.config.config import option_context
|
|
26
28
|
from maxframe.core import ExecutableTuple, TileableGraph
|
|
29
|
+
from maxframe.errors import NoTaskServerResponseError
|
|
27
30
|
from maxframe.lib.aio import stop_isolation
|
|
28
31
|
from maxframe.protocol import ResultInfo
|
|
29
32
|
from maxframe.serialization import RemoteException
|
|
@@ -35,6 +38,7 @@ from maxframe_framedriver.app.tests.test_framedriver_webapp import ( # noqa: F4
|
|
|
35
38
|
)
|
|
36
39
|
|
|
37
40
|
from ..clients.framedriver import FrameDriverClient
|
|
41
|
+
from ..session.odps import MaxFrameRestCaller
|
|
38
42
|
|
|
39
43
|
pytestmark = pytest.mark.maxframe_engine(["MCSQL", "SPE"])
|
|
40
44
|
|
|
@@ -82,15 +86,32 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
82
86
|
session_id: str,
|
|
83
87
|
dag: TileableGraph,
|
|
84
88
|
managed_input_infos: Dict[str, ResultInfo] = None,
|
|
89
|
+
new_settings: Dict[str, Any] = None,
|
|
85
90
|
):
|
|
86
91
|
assert len(dag) == 2
|
|
87
|
-
return await original_submit_dag(
|
|
92
|
+
return await original_submit_dag(
|
|
93
|
+
self, session_id, dag, managed_input_infos, new_settings
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
no_task_server_raised = False
|
|
97
|
+
original_get_dag_info = MaxFrameRestCaller.get_dag_info
|
|
98
|
+
|
|
99
|
+
async def patched_get_dag_info(self, dag_id: str):
|
|
100
|
+
nonlocal no_task_server_raised
|
|
101
|
+
|
|
102
|
+
if not no_task_server_raised:
|
|
103
|
+
no_task_server_raised = True
|
|
104
|
+
raise NoTaskServerResponseError
|
|
105
|
+
return await original_get_dag_info(self, dag_id)
|
|
88
106
|
|
|
89
107
|
df["H"] = "extra_content"
|
|
90
108
|
|
|
91
109
|
with mock.patch(
|
|
92
110
|
"maxframe_client.clients.framedriver.FrameDriverClient.submit_dag",
|
|
93
111
|
new=patched_submit_dag,
|
|
112
|
+
), mock.patch(
|
|
113
|
+
"maxframe_client.session.odps.MaxFrameRestCaller.get_dag_info",
|
|
114
|
+
new=patched_get_dag_info,
|
|
94
115
|
):
|
|
95
116
|
result = df.execute().fetch()
|
|
96
117
|
assert len(result) == 1000
|
|
@@ -112,13 +133,30 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
112
133
|
)
|
|
113
134
|
assert odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
114
135
|
del df
|
|
115
|
-
|
|
136
|
+
retry_times = 10
|
|
137
|
+
while (
|
|
138
|
+
odps_entry.exist_table(
|
|
139
|
+
build_temp_table_name(start_mock_session, intermediate_key)
|
|
140
|
+
)
|
|
141
|
+
and retry_times > 0
|
|
142
|
+
):
|
|
143
|
+
time.sleep(1)
|
|
144
|
+
retry_times -= 1
|
|
116
145
|
assert not odps_entry.exist_table(
|
|
117
146
|
build_temp_table_name(start_mock_session, intermediate_key)
|
|
118
147
|
)
|
|
119
148
|
assert not odps_entry.exist_table(build_temp_table_name(start_mock_session, key))
|
|
120
149
|
|
|
121
150
|
|
|
151
|
+
def test_run_and_fetch_slice(start_mock_session):
|
|
152
|
+
pd_df = pd.DataFrame(np.random.rand(1000, 5), columns=list("ABCDE"))
|
|
153
|
+
df = md.DataFrame(pd_df)
|
|
154
|
+
result = df.execute()
|
|
155
|
+
|
|
156
|
+
sliced = result.head(10).fetch()
|
|
157
|
+
assert len(sliced) == 10
|
|
158
|
+
|
|
159
|
+
|
|
122
160
|
def test_run_empty_table(start_mock_session):
|
|
123
161
|
odps_entry = ODPS.from_environments()
|
|
124
162
|
|
|
@@ -139,6 +177,25 @@ def test_run_empty_table(start_mock_session):
|
|
|
139
177
|
empty_table.drop()
|
|
140
178
|
|
|
141
179
|
|
|
180
|
+
def test_run_odps_query_without_schema(start_mock_session):
|
|
181
|
+
odps_entry = ODPS.from_environments()
|
|
182
|
+
|
|
183
|
+
table_name = tn("test_session_empty_table")
|
|
184
|
+
odps_entry.delete_table(table_name, if_exists=True)
|
|
185
|
+
test_table = odps_entry.create_table(table_name, "a double, b double", lifecycle=1)
|
|
186
|
+
|
|
187
|
+
with test_table.open_writer() as writer:
|
|
188
|
+
writer.write([123, 456])
|
|
189
|
+
|
|
190
|
+
df = md.read_odps_query(
|
|
191
|
+
f"select a, b, a + b as `special: name` from {table_name}", skip_schema=True
|
|
192
|
+
)
|
|
193
|
+
executed = df.execute().fetch()
|
|
194
|
+
assert len(executed.dtypes) == 3
|
|
195
|
+
|
|
196
|
+
test_table.drop()
|
|
197
|
+
|
|
198
|
+
|
|
142
199
|
def test_run_dataframe_with_pd_source(start_mock_session):
|
|
143
200
|
odps_entry = ODPS.from_environments()
|
|
144
201
|
|
|
@@ -171,19 +228,38 @@ def test_run_dataframe_from_to_odps_table(start_mock_session):
|
|
|
171
228
|
table_name = build_temp_table_name(start_mock_session, "tmp_save")
|
|
172
229
|
table_obj = odps_entry.get_table(table_name)
|
|
173
230
|
try:
|
|
174
|
-
md.to_odps_table(md.DataFrame(pd_df), table_obj).execute().fetch()
|
|
231
|
+
md.to_odps_table(md.DataFrame(pd_df), table_obj, lifecycle=1).execute().fetch()
|
|
175
232
|
with table_obj.open_reader() as reader:
|
|
176
233
|
result_df = reader.to_pandas()
|
|
177
234
|
assert len(result_df) == 10
|
|
178
235
|
assert len(result_df.columns) == 6
|
|
179
236
|
|
|
180
|
-
df = md.read_odps_table(table_obj, index_col="index").head(10).execute()
|
|
237
|
+
df = md.read_odps_table(table_obj, index_col="index").head(10).execute()
|
|
238
|
+
assert df.shape == (10, 5)
|
|
181
239
|
assert len(df) == 10
|
|
182
240
|
assert len(df.columns) == 5
|
|
183
241
|
finally:
|
|
184
242
|
odps_entry.delete_table(table_name, if_exists=True)
|
|
185
243
|
|
|
186
244
|
|
|
245
|
+
def test_create_session_with_options(framedriver_app): # noqa: F811
|
|
246
|
+
odps_entry = ODPS.from_environments()
|
|
247
|
+
framedriver_addr = f"mf://localhost:{framedriver_app.port}"
|
|
248
|
+
old_value = options.session.max_alive_seconds
|
|
249
|
+
session = None
|
|
250
|
+
try:
|
|
251
|
+
options.session.max_alive_seconds = 10
|
|
252
|
+
session = new_session(framedriver_addr, odps_entry=odps_entry)
|
|
253
|
+
session_id = session.session_id
|
|
254
|
+
session_conf = framedriver_app.session_manager.get_session_settings(session_id)
|
|
255
|
+
with option_context(session_conf) as session_options:
|
|
256
|
+
assert session_options.session.max_alive_seconds == 10
|
|
257
|
+
finally:
|
|
258
|
+
options.session.max_alive_seconds = old_value
|
|
259
|
+
if session is not None:
|
|
260
|
+
session.destroy()
|
|
261
|
+
|
|
262
|
+
|
|
187
263
|
def test_run_and_fetch_series(start_mock_session):
|
|
188
264
|
odps_entry = ODPS.from_environments()
|
|
189
265
|
|
|
@@ -210,7 +286,22 @@ def test_run_and_fetch_series(start_mock_session):
|
|
|
210
286
|
)
|
|
211
287
|
|
|
212
288
|
|
|
213
|
-
def
|
|
289
|
+
def test_execute_with_tensor(oss_config, start_mock_session):
|
|
290
|
+
pd_df = pd.DataFrame(
|
|
291
|
+
{"angles": [0, 3, 4], "degrees": [360, 180, 360]},
|
|
292
|
+
index=["circle", "triangle", "rectangle"],
|
|
293
|
+
)
|
|
294
|
+
df = md.DataFrame(pd_df)
|
|
295
|
+
|
|
296
|
+
result = (df - [1, 2]).execute().fetch()
|
|
297
|
+
expected = pd_df - [1, 2]
|
|
298
|
+
# TODO: currently the record order in tensor reading from table is the index
|
|
299
|
+
# sorting order
|
|
300
|
+
expected.sort_index(axis=0, inplace=True)
|
|
301
|
+
pd.testing.assert_frame_equal(result, expected, check_like=True)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def test_run_remote_success(oss_config, start_mock_session):
|
|
214
305
|
def func(a, b):
|
|
215
306
|
return a + b
|
|
216
307
|
|
|
@@ -221,7 +312,7 @@ def test_run_remote_success(start_mock_session):
|
|
|
221
312
|
assert result == 21
|
|
222
313
|
|
|
223
314
|
|
|
224
|
-
def test_run_remote_error(start_mock_session):
|
|
315
|
+
def test_run_remote_error(oss_config, start_mock_session):
|
|
225
316
|
def func():
|
|
226
317
|
raise ValueError
|
|
227
318
|
|
|
@@ -244,7 +335,7 @@ def test_pivot_dataframe(start_mock_session):
|
|
|
244
335
|
df = md.DataFrame(pd_df)
|
|
245
336
|
pivot = df.pivot_table(values="D", index=["A", "B"], columns=["C"], aggfunc="sum")
|
|
246
337
|
executed = pivot.execute()
|
|
247
|
-
assert pivot.shape == (
|
|
338
|
+
assert pivot.shape == (4, 2)
|
|
248
339
|
pd.testing.assert_index_equal(
|
|
249
340
|
pivot.dtypes.index, pd.Index(["large", "small"], name="C")
|
|
250
341
|
)
|
|
@@ -253,3 +344,13 @@ def test_pivot_dataframe(start_mock_session):
|
|
|
253
344
|
values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
|
|
254
345
|
)
|
|
255
346
|
pd.testing.assert_frame_equal(executed.to_pandas(), expected)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def test_index_drop_duplicates(start_mock_session):
|
|
350
|
+
pd_idx = pd.Index(["lame", "cow", "lame", "beetle", "lame", "hippo"])
|
|
351
|
+
idx = md.Index(pd_idx)
|
|
352
|
+
executed = idx.drop_duplicates(keep="first").execute()
|
|
353
|
+
expected = pd_idx.drop_duplicates(keep="first")
|
|
354
|
+
pd.testing.assert_index_equal(
|
|
355
|
+
executed.to_pandas().sort_values(), expected.sort_values()
|
|
356
|
+
)
|
maxframe/core/entity/chunks.py
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
from ...serialization.serializables import BoolField, FieldTypes, TupleField
|
|
16
|
-
from ...utils import tokenize
|
|
17
|
-
from .core import Entity, EntityData
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class ChunkData(EntityData):
|
|
21
|
-
__slots__ = ()
|
|
22
|
-
|
|
23
|
-
is_broadcaster = BoolField("is_broadcaster", default=False)
|
|
24
|
-
# If the operator is a shuffle mapper, this flag indicates whether the current chunk is mapper chunk when
|
|
25
|
-
# the operator produce multiple chunks such as TensorUnique.
|
|
26
|
-
is_mapper = BoolField("is_mapper", default=None)
|
|
27
|
-
# optional fields
|
|
28
|
-
_index = TupleField("index", FieldTypes.uint32)
|
|
29
|
-
|
|
30
|
-
def __repr__(self):
|
|
31
|
-
if self.op.stage is None:
|
|
32
|
-
return (
|
|
33
|
-
f"{type(self).__name__} <op={type(self.op).__name__}, "
|
|
34
|
-
f"key={self.key}>"
|
|
35
|
-
)
|
|
36
|
-
else:
|
|
37
|
-
return (
|
|
38
|
-
f"{type(self).__name__} <op={type(self.op).__name__}, "
|
|
39
|
-
f"stage={self.op.stage.name}, key={self.key}>"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
@property
|
|
43
|
-
def index(self):
|
|
44
|
-
return getattr(self, "_index", None)
|
|
45
|
-
|
|
46
|
-
@property
|
|
47
|
-
def device(self):
|
|
48
|
-
return self.op.device
|
|
49
|
-
|
|
50
|
-
def _update_key(self):
|
|
51
|
-
object.__setattr__(
|
|
52
|
-
self,
|
|
53
|
-
"_key",
|
|
54
|
-
tokenize(
|
|
55
|
-
type(self).__name__,
|
|
56
|
-
*(getattr(self, k, None) for k in self._keys_ if k != "_index"),
|
|
57
|
-
),
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class Chunk(Entity):
|
|
62
|
-
_allow_data_type_ = (ChunkData,)
|
|
63
|
-
|
|
64
|
-
def __repr__(self):
|
|
65
|
-
return f"{type(self).__name__}({self._data.__repr__()})"
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
CHUNK_TYPE = (Chunk, ChunkData)
|
maxframe/core/entity/fuse.py
DELETED
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
|
|
15
|
-
import numpy as np
|
|
16
|
-
|
|
17
|
-
from ...serialization.serializables import ReferenceField
|
|
18
|
-
from .chunks import CHUNK_TYPE, Chunk, ChunkData
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class FuseChunkData(ChunkData):
|
|
22
|
-
__slots__ = ("_inited",)
|
|
23
|
-
|
|
24
|
-
_chunk = ReferenceField(
|
|
25
|
-
"chunk", CHUNK_TYPE, on_serialize=lambda x: x.data if hasattr(x, "data") else x
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
def __init__(self, *args, **kwargs):
|
|
29
|
-
self._inited = False
|
|
30
|
-
super().__init__(*args, **kwargs)
|
|
31
|
-
self._extra_params = {}
|
|
32
|
-
self._inited = True
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def chunk(self):
|
|
36
|
-
return self._chunk
|
|
37
|
-
|
|
38
|
-
@property
|
|
39
|
-
def composed(self):
|
|
40
|
-
# for compatibility, just return the topological ordering,
|
|
41
|
-
# once we apply optimization on the subgraph,
|
|
42
|
-
# `composed` is not needed any more and should be removed then.
|
|
43
|
-
assert getattr(self._op, "fuse_graph", None) is not None
|
|
44
|
-
fuse_graph = self._op.fuse_graph
|
|
45
|
-
return list(fuse_graph.topological_iter())
|
|
46
|
-
|
|
47
|
-
def __getattr__(self, attr):
|
|
48
|
-
if not self._inited:
|
|
49
|
-
return object.__getattribute__(self, attr)
|
|
50
|
-
if attr in self._extra_params:
|
|
51
|
-
return self._extra_params[attr]
|
|
52
|
-
try:
|
|
53
|
-
return getattr(self._chunk, attr)
|
|
54
|
-
except AttributeError:
|
|
55
|
-
return object.__getattribute__(self, attr)
|
|
56
|
-
|
|
57
|
-
def __setattr__(self, attr, value):
|
|
58
|
-
if attr == "params":
|
|
59
|
-
self._chunk.params = value
|
|
60
|
-
else:
|
|
61
|
-
super().__setattr__(attr, value)
|
|
62
|
-
|
|
63
|
-
@property
|
|
64
|
-
def nbytes(self):
|
|
65
|
-
return np.prod(self.shape) * self.dtype.itemsize
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class FuseChunk(Chunk):
|
|
69
|
-
__slots__ = ()
|
|
70
|
-
_allow_data_type_ = (FuseChunkData,)
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
FUSE_CHUNK_TYPE = (FuseChunkData, FuseChunk)
|