maxframe 0.1.0b4__cp311-cp311-win_amd64.whl → 0.1.0b5__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (53) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win_amd64.pyd +0 -0
  3. maxframe/codegen.py +46 -1
  4. maxframe/config/config.py +11 -1
  5. maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
  6. maxframe/dataframe/__init__.py +1 -0
  7. maxframe/dataframe/core.py +30 -8
  8. maxframe/dataframe/datasource/read_odps_query.py +3 -1
  9. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  10. maxframe/dataframe/misc/__init__.py +4 -0
  11. maxframe/dataframe/misc/apply.py +1 -1
  12. maxframe/dataframe/misc/case_when.py +141 -0
  13. maxframe/dataframe/misc/pivot_table.py +262 -0
  14. maxframe/dataframe/misc/tests/test_misc.py +61 -0
  15. maxframe/dataframe/plotting/core.py +2 -2
  16. maxframe/dataframe/reduction/core.py +2 -1
  17. maxframe/dataframe/utils.py +7 -0
  18. maxframe/learn/contrib/utils.py +52 -0
  19. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  20. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  21. maxframe/learn/contrib/xgboost/core.py +156 -0
  22. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  23. maxframe/learn/contrib/xgboost/predict.py +138 -0
  24. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  25. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  26. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  27. maxframe/learn/contrib/xgboost/train.py +121 -0
  28. maxframe/learn/utils/__init__.py +15 -0
  29. maxframe/learn/utils/core.py +29 -0
  30. maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
  31. maxframe/odpsio/arrow.py +2 -3
  32. maxframe/odpsio/tableio.py +22 -0
  33. maxframe/odpsio/tests/test_schema.py +16 -11
  34. maxframe/opcodes.py +3 -0
  35. maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
  36. maxframe/serialization/core.pyi +61 -0
  37. maxframe/session.py +28 -0
  38. maxframe/tensor/__init__.py +1 -1
  39. maxframe/tensor/base/__init__.py +2 -0
  40. maxframe/tensor/base/atleast_1d.py +74 -0
  41. maxframe/tensor/base/unique.py +205 -0
  42. maxframe/tensor/datasource/array.py +4 -2
  43. maxframe/tensor/datasource/scalar.py +1 -1
  44. maxframe/udf.py +63 -3
  45. maxframe/utils.py +6 -0
  46. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/METADATA +2 -2
  47. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/RECORD +53 -36
  48. maxframe_client/fetcher.py +65 -3
  49. maxframe_client/session/odps.py +30 -1
  50. maxframe_client/session/task.py +26 -53
  51. maxframe_client/tests/test_session.py +28 -1
  52. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/WHEEL +0 -0
  53. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/top_level.txt +0 -0
@@ -21,9 +21,8 @@ from typing import Dict, List, Optional, Type, Union
21
21
  import msgpack
22
22
  from odps import ODPS
23
23
  from odps import options as odps_options
24
- from odps import serializers
25
24
  from odps.errors import parse_instance_error
26
- from odps.models import Instance, Task
25
+ from odps.models import Instance, MaxFrameTask
27
26
 
28
27
  from maxframe.config import options
29
28
  from maxframe.core import TileableGraph
@@ -55,55 +54,6 @@ from .odps import MaxFrameServiceCaller, MaxFrameSession
55
54
  logger = logging.getLogger(__name__)
56
55
 
57
56
 
58
- class MaxFrameTask(Task):
59
- __slots__ = ("_output_format", "_major_version", "_service_endpoint")
60
- _root = "MaxFrame"
61
- _anonymous_task_name = "AnonymousMaxFrameTask"
62
-
63
- command = serializers.XMLNodeField("Command", default="CREATE_SESSION")
64
-
65
- def __init__(self, **kwargs):
66
- kwargs["name"] = kwargs.get("name") or self._anonymous_task_name
67
- self._output_format = kwargs.pop(
68
- "output_format", MAXFRAME_OUTPUT_MSGPACK_FORMAT
69
- )
70
- self._major_version = kwargs.pop("major_version", None)
71
- self._service_endpoint = kwargs.pop("service_endpoint", None)
72
- super().__init__(**kwargs)
73
-
74
- def serial(self):
75
- if self.properties is None:
76
- self.properties = dict()
77
-
78
- if odps_options.default_task_settings:
79
- settings = odps_options.default_task_settings
80
- else:
81
- settings = dict()
82
-
83
- if self._major_version is not None:
84
- settings["odps.task.major.version"] = self._major_version
85
-
86
- if "settings" in self.properties:
87
- settings.update(json.loads(self.properties["settings"]))
88
-
89
- # merge sql options
90
- sql_settings = (odps_options.sql.settings or {}).copy()
91
- sql_settings.update(options.sql.settings or {})
92
-
93
- mf_settings = dict(options.to_dict(remote_only=True).items())
94
- mf_settings["sql.settings"] = sql_settings
95
- mf_opts = {
96
- "odps.maxframe.settings": json.dumps(mf_settings),
97
- "odps.maxframe.output_format": self._output_format,
98
- "odps.service.endpoint": self._service_endpoint,
99
- }
100
- if mf_version:
101
- mf_opts["odps.maxframe.client_version"] = mf_version
102
- settings.update(mf_opts)
103
- self.properties["settings"] = json.dumps(settings)
104
- return super().serial()
105
-
106
-
107
57
  class MaxFrameInstanceCaller(MaxFrameServiceCaller):
108
58
  _instance: Optional[Instance]
109
59
 
@@ -159,13 +109,31 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
159
109
  f"Serialization format {self._output_format} not supported"
160
110
  )
161
111
 
162
- def create_session(self) -> SessionInfo:
112
+ def _create_maxframe_task(self) -> MaxFrameTask:
163
113
  task = MaxFrameTask(
164
114
  name=self._task_name,
165
115
  major_version=self._major_version,
166
- output_format=self._output_format,
167
116
  service_endpoint=self._odps_entry.endpoint,
168
117
  )
118
+
119
+ # merge sql options
120
+ sql_settings = (odps_options.sql.settings or {}).copy()
121
+ sql_settings.update(options.sql.settings or {})
122
+
123
+ mf_settings = dict(options.to_dict(remote_only=True).items())
124
+ mf_settings["sql.settings"] = sql_settings
125
+
126
+ mf_opts = {
127
+ "odps.maxframe.settings": json.dumps(mf_settings),
128
+ "odps.maxframe.output_format": self._output_format,
129
+ }
130
+ if mf_version:
131
+ mf_opts["odps.maxframe.client_version"] = mf_version
132
+ task.update_settings(mf_opts)
133
+ return task
134
+
135
+ def create_session(self) -> SessionInfo:
136
+ task = self._create_maxframe_task()
169
137
  if not self._nested:
170
138
  self._task_name = task.name
171
139
  project = self._odps_entry.get_project(self._project)
@@ -278,6 +246,11 @@ class MaxFrameInstanceCaller(MaxFrameServiceCaller):
278
246
  self._task_name, MAXFRAME_TASK_DECREF_METHOD, json.dumps(req_data)
279
247
  )
280
248
 
249
+ def get_logview_address(self, dag_id=None, hours=None) -> Optional[str]:
250
+ hours = hours or options.session.logview_hours
251
+ subquery_suffix = f"&subQuery={dag_id}" if dag_id else ""
252
+ return self._instance.get_logview_address(hours) + subquery_suffix
253
+
281
254
 
282
255
  class MaxFrameTaskSession(MaxFrameSession):
283
256
  schemes = [ODPS_SESSION_INSECURE_SCHEME, ODPS_SESSION_SECURE_SCHEME]
@@ -99,9 +99,12 @@ def test_simple_run_dataframe(start_mock_session):
99
99
  corner_top, corner_bottom = ExecutableTuple([df.iloc[:10], df.iloc[-10:]]).fetch()
100
100
  assert len(corner_top) == len(corner_bottom) == 10
101
101
 
102
- # check ellipsis mark in DataFrame errors
102
+ # check ellipsis mark in DataFrame reprs
103
103
  df_str_repr = str(df)
104
104
  assert ".." in df_str_repr
105
+ # check ellipsis mark in Series reprs
106
+ series_str_repr = str(df.A.execute())
107
+ assert ".." in series_str_repr
105
108
 
106
109
  key = df.key
107
110
  assert odps_entry.exist_table(
@@ -226,3 +229,27 @@ def test_run_remote_error(start_mock_session):
226
229
 
227
230
  with pytest.raises((ValueError, RemoteException)):
228
231
  v.execute()
232
+
233
+
234
+ def test_pivot_dataframe(start_mock_session):
235
+ pd_df = pd.DataFrame(
236
+ {
237
+ "A": "foo foo foo foo foo bar bar bar bar".split(),
238
+ "B": "one one one two two one one two two".split(),
239
+ "C": "small large large small small large small small large".split(),
240
+ "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
241
+ "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
242
+ }
243
+ )
244
+ df = md.DataFrame(pd_df)
245
+ pivot = df.pivot_table(values="D", index=["A", "B"], columns=["C"], aggfunc="sum")
246
+ executed = pivot.execute()
247
+ assert pivot.shape == (2, 4)
248
+ pd.testing.assert_index_equal(
249
+ pivot.dtypes.index, pd.Index(["large", "small"], name="C")
250
+ )
251
+
252
+ expected = pd_df.pivot_table(
253
+ values="D", index=["A", "B"], columns=["C"], aggfunc="sum"
254
+ )
255
+ pd.testing.assert_frame_equal(executed.to_pandas(), expected)