maxframe 0.1.0b4__cp310-cp310-macosx_10_9_universal2.whl → 0.1.0b5__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/codegen.py +46 -1
- maxframe/config/config.py +11 -1
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/dataframe/__init__.py +1 -0
- maxframe/dataframe/core.py +30 -8
- maxframe/dataframe/datasource/read_odps_query.py +3 -1
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +1 -1
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +61 -0
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +2 -1
- maxframe/dataframe/utils.py +7 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +86 -0
- maxframe/learn/contrib/xgboost/core.py +156 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
- maxframe/learn/contrib/xgboost/predict.py +138 -0
- maxframe/learn/contrib/xgboost/regressor.py +78 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +121 -0
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/odpsio/arrow.py +2 -3
- maxframe/odpsio/tableio.py +22 -0
- maxframe/odpsio/tests/test_schema.py +16 -11
- maxframe/opcodes.py +3 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pyi +61 -0
- maxframe/session.py +28 -0
- maxframe/tensor/__init__.py +1 -1
- maxframe/tensor/base/__init__.py +2 -0
- maxframe/tensor/base/atleast_1d.py +74 -0
- maxframe/tensor/base/unique.py +205 -0
- maxframe/tensor/datasource/array.py +4 -2
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/udf.py +63 -3
- maxframe/utils.py +6 -0
- {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/METADATA +2 -2
- {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/RECORD +53 -36
- maxframe_client/fetcher.py +65 -3
- maxframe_client/session/odps.py +30 -1
- maxframe_client/session/task.py +26 -53
- maxframe_client/tests/test_session.py +28 -1
- {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/WHEEL +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/top_level.txt +0 -0
maxframe_client/session/task.py
CHANGED
|
@@ -21,9 +21,8 @@ from typing import Dict, List, Optional, Type, Union
|
|
|
21
21
|
import msgpack
|
|
22
22
|
from odps import ODPS
|
|
23
23
|
from odps import options as odps_options
|
|
24
|
-
from odps import serializers
|
|
25
24
|
from odps.errors import parse_instance_error
|
|
26
|
-
from odps.models import Instance,
|
|
25
|
+
from odps.models import Instance, MaxFrameTask
|
|
27
26
|
|
|
28
27
|
from maxframe.config import options
|
|
29
28
|
from maxframe.core import TileableGraph
|
|
@@ -55,55 +54,6 @@ from .odps import MaxFrameServiceCaller, MaxFrameSession
|
|
|
55
54
|
logger = logging.getLogger(__name__)
|
|
56
55
|
|
|
57
56
|
|
|
58
|
-
class MaxFrameTask(Task):
|
|
59
|
-
__slots__ = ("_output_format", "_major_version", "_service_endpoint")
|
|
60
|
-
_root = "MaxFrame"
|
|
61
|
-
_anonymous_task_name = "AnonymousMaxFrameTask"
|
|
62
|
-
|
|
63
|
-
command = serializers.XMLNodeField("Command", default="CREATE_SESSION")
|
|
64
|
-
|
|
65
|
-
def __init__(self, **kwargs):
|
|
66
|
-
kwargs["name"] = kwargs.get("name") or self._anonymous_task_name
|
|
67
|
-
self._output_format = kwargs.pop(
|
|
68
|
-
"output_format", MAXFRAME_OUTPUT_MSGPACK_FORMAT
|
|
69
|
-
)
|
|
70
|
-
self._major_version = kwargs.pop("major_version", None)
|
|
71
|
-
self._service_endpoint = kwargs.pop("service_endpoint", None)
|
|
72
|
-
super().__init__(**kwargs)
|
|
73
|
-
|
|
74
|
-
def serial(self):
|
|
75
|
-
if self.properties is None:
|
|
76
|
-
self.properties = dict()
|
|
77
|
-
|
|
78
|
-
if odps_options.default_task_settings:
|
|
79
|
-
settings = odps_options.default_task_settings
|
|
80
|
-
else:
|
|
81
|
-
settings = dict()
|
|
82
|
-
|
|
83
|
-
if self._major_version is not None:
|
|
84
|
-
settings["odps.task.major.version"] = self._major_version
|
|
85
|
-
|
|
86
|
-
if "settings" in self.properties:
|
|
87
|
-
settings.update(json.loads(self.properties["settings"]))
|
|
88
|
-
|
|
89
|
-
# merge sql options
|
|
90
|
-
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
91
|
-
sql_settings.update(options.sql.settings or {})
|
|
92
|
-
|
|
93
|
-
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
94
|
-
mf_settings["sql.settings"] = sql_settings
|
|
95
|
-
mf_opts = {
|
|
96
|
-
"odps.maxframe.settings": json.dumps(mf_settings),
|
|
97
|
-
"odps.maxframe.output_format": self._output_format,
|
|
98
|
-
"odps.service.endpoint": self._service_endpoint,
|
|
99
|
-
}
|
|
100
|
-
if mf_version:
|
|
101
|
-
mf_opts["odps.maxframe.client_version"] = mf_version
|
|
102
|
-
settings.update(mf_opts)
|
|
103
|
-
self.properties["settings"] = json.dumps(settings)
|
|
104
|
-
return super().serial()
|
|
105
|
-
|
|
106
|
-
|
|
107
57
|
class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
108
58
|
_instance: Optional[Instance]
|
|
109
59
|
|
|
@@ -159,13 +109,31 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
159
109
|
f"Serialization format {self._output_format} not supported"
|
|
160
110
|
)
|
|
161
111
|
|
|
162
|
-
def
|
|
112
|
+
def _create_maxframe_task(self) -> MaxFrameTask:
|
|
163
113
|
task = MaxFrameTask(
|
|
164
114
|
name=self._task_name,
|
|
165
115
|
major_version=self._major_version,
|
|
166
|
-
output_format=self._output_format,
|
|
167
116
|
service_endpoint=self._odps_entry.endpoint,
|
|
168
117
|
)
|
|
118
|
+
|
|
119
|
+
# merge sql options
|
|
120
|
+
sql_settings = (odps_options.sql.settings or {}).copy()
|
|
121
|
+
sql_settings.update(options.sql.settings or {})
|
|
122
|
+
|
|
123
|
+
mf_settings = dict(options.to_dict(remote_only=True).items())
|
|
124
|
+
mf_settings["sql.settings"] = sql_settings
|
|
125
|
+
|
|
126
|
+
mf_opts = {
|
|
127
|
+
"odps.maxframe.settings": json.dumps(mf_settings),
|
|
128
|
+
"odps.maxframe.output_format": self._output_format,
|
|
129
|
+
}
|
|
130
|
+
if mf_version:
|
|
131
|
+
mf_opts["odps.maxframe.client_version"] = mf_version
|
|
132
|
+
task.update_settings(mf_opts)
|
|
133
|
+
return task
|
|
134
|
+
|
|
135
|
+
def create_session(self) -> SessionInfo:
|
|
136
|
+
task = self._create_maxframe_task()
|
|
169
137
|
if not self._nested:
|
|
170
138
|
self._task_name = task.name
|
|
171
139
|
project = self._odps_entry.get_project(self._project)
|
|
@@ -278,6 +246,11 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
|
|
|
278
246
|
self._task_name, MAXFRAME_TASK_DECREF_METHOD, json.dumps(req_data)
|
|
279
247
|
)
|
|
280
248
|
|
|
249
|
+
def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
|
|
250
|
+
hours = hours or options.session.logview_hours
|
|
251
|
+
subquery_suffix = f"&subQuery={dag_id}" if dag_id else ""
|
|
252
|
+
return self._instance.get_logview_address(hours) + subquery_suffix
|
|
253
|
+
|
|
281
254
|
|
|
282
255
|
class MaxFrameTaskSession(MaxFrameSession):
|
|
283
256
|
schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
|
|
@@ -99,9 +99,12 @@ def test_simple_run_dataframe(start_mock_session):
|
|
|
99
99
|
corner_top, corner_bottom = ExecutableTuple([df.iloc[:10], df.iloc[-10:]]).fetch()
|
|
100
100
|
assert len(corner_top) == len(corner_bottom) == 10
|
|
101
101
|
|
|
102
|
-
# check ellipsis mark in DataFrame
|
|
102
|
+
# check ellipsis mark in DataFrame reprs
|
|
103
103
|
df_str_repr = str(df)
|
|
104
104
|
assert ".." in df_str_repr
|
|
105
|
+
# check ellipsis mark in Series reprs
|
|
106
|
+
series_str_repr = str(df.A.execute())
|
|
107
|
+
assert ".." in series_str_repr
|
|
105
108
|
|
|
106
109
|
key = df.key
|
|
107
110
|
assert odps_entry.exist_table(
|
|
@@ -226,3 +229,27 @@ def test_run_remote_error(start_mock_session):
|
|
|
226
229
|
|
|
227
230
|
with pytest.raises((ValueError, RemoteException)):
|
|
228
231
|
v.execute()
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def test_pivot_dataframe(start_mock_session):
|
|
235
|
+
pd_df = pd.DataFrame(
|
|
236
|
+
{
|
|
237
|
+
"A": "foo foo foo foo foo bar bar bar bar".split(),
|
|
238
|
+
"B": "one one one two two one one two two".split(),
|
|
239
|
+
"C": "small large large small small large small small large".split(),
|
|
240
|
+
"D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
|
|
241
|
+
"E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
|
|
242
|
+
}
|
|
243
|
+
)
|
|
244
|
+
df = md.DataFrame(pd_df)
|
|
245
|
+
pivot = df.pivot_table(values="D", index=["A", "B"], columns=["C"], aggfunc="sum")
|
|
246
|
+
executed = pivot.execute()
|
|
247
|
+
assert pivot.shape == (2, 4)
|
|
248
|
+
pd.testing.assert_index_equal(
|
|
249
|
+
pivot.dtypes.index, pd.Index(["large", "small"], name="C")
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
expected = pd_df.pivot_table(
|
|
253
|
+
values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
|
|
254
|
+
)
|
|
255
|
+
pd.testing.assert_frame_equal(executed.to_pandas(), expected)
|
|
File without changes
|
|
File without changes
|