maxframe 1.0.0rc4__cp39-cp39-win_amd64.whl → 1.1.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (83) hide show
  1. maxframe/_utils.cp39-win_amd64.pyd +0 -0
  2. maxframe/config/config.py +3 -0
  3. maxframe/conftest.py +9 -2
  4. maxframe/core/graph/core.cp39-win_amd64.pyd +0 -0
  5. maxframe/core/operator/base.py +2 -0
  6. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  7. maxframe/dataframe/core.py +24 -2
  8. maxframe/dataframe/datasource/read_odps_query.py +63 -34
  9. maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
  10. maxframe/dataframe/extensions/__init__.py +5 -0
  11. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  12. maxframe/dataframe/extensions/flatjson.py +131 -0
  13. maxframe/dataframe/extensions/flatmap.py +28 -40
  14. maxframe/dataframe/extensions/reshuffle.py +1 -1
  15. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  16. maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
  17. maxframe/dataframe/groupby/__init__.py +1 -0
  18. maxframe/dataframe/groupby/aggregation.py +1 -0
  19. maxframe/dataframe/groupby/apply.py +9 -1
  20. maxframe/dataframe/groupby/core.py +1 -1
  21. maxframe/dataframe/groupby/fill.py +4 -1
  22. maxframe/dataframe/groupby/getitem.py +6 -0
  23. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  24. maxframe/dataframe/groupby/transform.py +8 -2
  25. maxframe/dataframe/indexing/loc.py +6 -4
  26. maxframe/dataframe/merge/__init__.py +9 -1
  27. maxframe/dataframe/merge/concat.py +41 -31
  28. maxframe/dataframe/merge/merge.py +1 -1
  29. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  30. maxframe/dataframe/misc/apply.py +3 -0
  31. maxframe/dataframe/misc/drop_duplicates.py +5 -1
  32. maxframe/dataframe/misc/map.py +3 -1
  33. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  34. maxframe/dataframe/misc/transform.py +22 -13
  35. maxframe/dataframe/reduction/__init__.py +3 -0
  36. maxframe/dataframe/reduction/aggregation.py +1 -0
  37. maxframe/dataframe/reduction/median.py +56 -0
  38. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  39. maxframe/dataframe/statistics/quantile.py +8 -2
  40. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  41. maxframe/dataframe/tests/test_utils.py +60 -0
  42. maxframe/dataframe/utils.py +110 -7
  43. maxframe/dataframe/window/expanding.py +5 -3
  44. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  45. maxframe/io/objects/tests/test_object_io.py +39 -12
  46. maxframe/io/odpsio/arrow.py +30 -2
  47. maxframe/io/odpsio/schema.py +23 -5
  48. maxframe/io/odpsio/tableio.py +26 -110
  49. maxframe/io/odpsio/tests/test_schema.py +40 -0
  50. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  51. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  52. maxframe/io/odpsio/volumeio.py +27 -3
  53. maxframe/learn/contrib/__init__.py +3 -2
  54. maxframe/learn/contrib/llm/__init__.py +16 -0
  55. maxframe/learn/contrib/llm/core.py +54 -0
  56. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  57. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  58. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  59. maxframe/learn/contrib/llm/text.py +42 -0
  60. maxframe/lib/mmh3.cp39-win_amd64.pyd +0 -0
  61. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  62. maxframe/opcodes.py +7 -1
  63. maxframe/serialization/core.cp39-win_amd64.pyd +0 -0
  64. maxframe/serialization/core.pyx +13 -1
  65. maxframe/serialization/pandas.py +50 -20
  66. maxframe/serialization/serializables/core.py +24 -5
  67. maxframe/serialization/serializables/field_type.py +4 -1
  68. maxframe/serialization/serializables/tests/test_serializable.py +8 -1
  69. maxframe/serialization/tests/test_serial.py +2 -1
  70. maxframe/tensor/__init__.py +19 -7
  71. maxframe/tests/utils.py +16 -0
  72. maxframe/udf.py +27 -0
  73. maxframe/utils.py +36 -8
  74. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
  75. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/RECORD +83 -72
  76. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
  77. maxframe_client/clients/framedriver.py +4 -1
  78. maxframe_client/fetcher.py +18 -2
  79. maxframe_client/session/odps.py +23 -10
  80. maxframe_client/session/task.py +2 -24
  81. maxframe_client/session/tests/test_task.py +0 -4
  82. maxframe_client/tests/test_session.py +30 -10
  83. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,42 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from typing import Any, Dict
15
+
16
+ from ....dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
17
+ from .core import LLM
18
+
19
+
20
+ class MultiModalLLM(LLM):
21
+ def generate(
22
+ self,
23
+ data,
24
+ prompt_template: Dict[str, Any],
25
+ params: Dict[str, Any] = None,
26
+ ):
27
+ raise NotImplementedError
28
+
29
+
30
+ def generate(
31
+ data,
32
+ model: MultiModalLLM,
33
+ prompt_template: Dict[str, Any],
34
+ params: Dict[str, Any] = None,
35
+ ):
36
+ if not isinstance(data, DATAFRAME_TYPE) and not isinstance(data, SERIES_TYPE):
37
+ raise ValueError("data must be a maxframe dataframe or series object")
38
+ if not isinstance(model, MultiModalLLM):
39
+ raise ValueError("model must be a MultiModalLLM object")
40
+ params = params if params is not None else dict()
41
+ model.validate_params(params)
42
+ return model.generate(data, prompt_template, params)
@@ -0,0 +1,42 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from typing import Any, Dict
15
+
16
+ from ....dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
17
+ from .core import LLM
18
+
19
+
20
+ class TextLLM(LLM):
21
+ def generate(
22
+ self,
23
+ data,
24
+ prompt_template: Dict[str, Any],
25
+ params: Dict[str, Any] = None,
26
+ ):
27
+ raise NotImplementedError
28
+
29
+
30
+ def generate(
31
+ data,
32
+ model: TextLLM,
33
+ prompt_template: Dict[str, Any],
34
+ params: Dict[str, Any] = None,
35
+ ):
36
+ if not isinstance(data, DATAFRAME_TYPE) and not isinstance(data, SERIES_TYPE):
37
+ raise ValueError("data must be a maxframe dataframe or series object")
38
+ if not isinstance(model, TextLLM):
39
+ raise ValueError("model must be a TextLLM object")
40
+ params = params if params is not None else dict()
41
+ model.validate_params(params)
42
+ return model.generate(data, prompt_template, params)
Binary file
@@ -55,13 +55,13 @@ def test_sparse_creation():
55
55
  s = SparseNDArray(s1_data)
56
56
  assert s.ndim == 2
57
57
  assert isinstance(s, SparseMatrix)
58
- assert_array_equal(s.toarray(), s1_data.A)
59
- assert_array_equal(s.todense(), s1_data.A)
58
+ assert_array_equal(s.toarray(), s1_data.toarray())
59
+ assert_array_equal(s.todense(), s1_data.toarray())
60
60
 
61
61
  ss = pickle.loads(pickle.dumps(s))
62
62
  assert s == ss
63
- assert_array_equal(ss.toarray(), s1_data.A)
64
- assert_array_equal(ss.todense(), s1_data.A)
63
+ assert_array_equal(ss.toarray(), s1_data.toarray())
64
+ assert_array_equal(ss.todense(), s1_data.toarray())
65
65
 
66
66
  v = SparseNDArray(v1, shape=(3,))
67
67
  assert s.ndim
@@ -331,12 +331,12 @@ def test_sparse_dot():
331
331
 
332
332
  assert_array_equal(mls.dot(s1, v1_s), s1.dot(v1_data))
333
333
  assert_array_equal(mls.dot(s2, v1_s), s2.dot(v1_data))
334
- assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.A))
335
- assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.A))
334
+ assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.toarray()))
335
+ assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.toarray()))
336
336
  assert_array_equal(mls.dot(v1_s, v1_s), v1_data.dot(v1_data), almost=True)
337
337
  assert_array_equal(mls.dot(v2_s, v2_s), v2_data.dot(v2_data), almost=True)
338
338
 
339
- assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.A))
339
+ assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.toarray()))
340
340
  assert_array_equal(mls.dot(v1_s, v1_s, sparse=False), v1_data.dot(v1_data))
341
341
 
342
342
 
@@ -390,7 +390,7 @@ def test_sparse_fill_diagonal():
390
390
  arr = SparseNDArray(s1)
391
391
  arr.fill_diagonal(3)
392
392
 
393
- expected = s1.copy().A
393
+ expected = s1.copy().toarray()
394
394
  np.fill_diagonal(expected, 3)
395
395
 
396
396
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -399,7 +399,7 @@ def test_sparse_fill_diagonal():
399
399
  arr = SparseNDArray(s1)
400
400
  arr.fill_diagonal(3, wrap=True)
401
401
 
402
- expected = s1.copy().A
402
+ expected = s1.copy().toarray()
403
403
  np.fill_diagonal(expected, 3, wrap=True)
404
404
 
405
405
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -408,7 +408,7 @@ def test_sparse_fill_diagonal():
408
408
  arr = SparseNDArray(s1)
409
409
  arr.fill_diagonal([1, 2, 3])
410
410
 
411
- expected = s1.copy().A
411
+ expected = s1.copy().toarray()
412
412
  np.fill_diagonal(expected, [1, 2, 3])
413
413
 
414
414
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -417,7 +417,7 @@ def test_sparse_fill_diagonal():
417
417
  arr = SparseNDArray(s1)
418
418
  arr.fill_diagonal([1, 2, 3], wrap=True)
419
419
 
420
- expected = s1.copy().A
420
+ expected = s1.copy().toarray()
421
421
  np.fill_diagonal(expected, [1, 2, 3], wrap=True)
422
422
 
423
423
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -427,7 +427,7 @@ def test_sparse_fill_diagonal():
427
427
  arr = SparseNDArray(s1)
428
428
  arr.fill_diagonal(val)
429
429
 
430
- expected = s1.copy().A
430
+ expected = s1.copy().toarray()
431
431
  np.fill_diagonal(expected, val)
432
432
 
433
433
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -437,7 +437,7 @@ def test_sparse_fill_diagonal():
437
437
  arr = SparseNDArray(s1)
438
438
  arr.fill_diagonal(val, wrap=True)
439
439
 
440
- expected = s1.copy().A
440
+ expected = s1.copy().toarray()
441
441
  np.fill_diagonal(expected, val, wrap=True)
442
442
 
443
443
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -447,7 +447,7 @@ def test_sparse_fill_diagonal():
447
447
  arr = SparseNDArray(s1)
448
448
  arr.fill_diagonal(val)
449
449
 
450
- expected = s1.copy().A
450
+ expected = s1.copy().toarray()
451
451
  np.fill_diagonal(expected, val)
452
452
 
453
453
  np.testing.assert_array_equal(arr.toarray(), expected)
@@ -457,7 +457,7 @@ def test_sparse_fill_diagonal():
457
457
  arr = SparseNDArray(s1)
458
458
  arr.fill_diagonal(val, wrap=True)
459
459
 
460
- expected = s1.copy().A
460
+ expected = s1.copy().toarray()
461
461
  np.fill_diagonal(expected, val, wrap=True)
462
462
 
463
463
  np.testing.assert_array_equal(arr.toarray(), expected)
maxframe/opcodes.py CHANGED
@@ -270,6 +270,7 @@ KURTOSIS = 351
270
270
  SEM = 352
271
271
  STR_CONCAT = 353
272
272
  MAD = 354
273
+ MEDIAN = 355
273
274
 
274
275
  # tensor operator
275
276
  RESHAPE = 401
@@ -377,7 +378,6 @@ DROP_DUPLICATES = 728
377
378
  MELT = 729
378
379
  RENAME = 731
379
380
  INSERT = 732
380
- MAP_CHUNK = 733
381
381
  CARTESIAN_CHUNK = 734
382
382
  EXPLODE = 735
383
383
  REPLACE = 736
@@ -392,6 +392,10 @@ PIVOT_TABLE = 744
392
392
 
393
393
  FUSE = 801
394
394
 
395
+ # LLM
396
+ DASHSCOPE_TEXT_GENERATION = 810
397
+ DASHSCOPE_MULTI_MODAL_GENERATION = 811
398
+
395
399
  # table like input for tensor
396
400
  TABLE_COO = 1003
397
401
  # store tensor as coo format
@@ -569,6 +573,8 @@ CHOLESKY_FUSE = 999988
569
573
  # MaxFrame-dedicated functions
570
574
  DATAFRAME_RESHUFFLE = 10001
571
575
  FLATMAP = 10002
576
+ FLATJSON = 10003
577
+ APPLY_CHUNK = 10004
572
578
 
573
579
  # MaxFrame internal operators
574
580
  DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
@@ -37,7 +37,7 @@ from .._utils import NamedType
37
37
  from .._utils cimport TypeDispatcher
38
38
 
39
39
  from ..lib import wrapped_pickle as pickle
40
- from ..utils import arrow_type_from_str
40
+ from ..utils import NoDefault, arrow_type_from_str, no_default
41
41
 
42
42
  try:
43
43
  from pandas import ArrowDtype
@@ -94,6 +94,7 @@ cdef:
94
94
  int COMPLEX_SERIALIZER = 12
95
95
  int SLICE_SERIALIZER = 13
96
96
  int REGEX_SERIALIZER = 14
97
+ int NO_DEFAULT_SERIALIZER = 15
97
98
  int PLACEHOLDER_SERIALIZER = 4096
98
99
 
99
100
 
@@ -803,6 +804,16 @@ cdef class RegexSerializer(Serializer):
803
804
  return re.compile((<bytes>(subs[0])).decode(), serialized[0])
804
805
 
805
806
 
807
+ cdef class NoDefaultSerializer(Serializer):
808
+ serializer_id = NO_DEFAULT_SERIALIZER
809
+
810
+ cpdef serial(self, object obj, dict context):
811
+ return [], [], True
812
+
813
+ cpdef deserial(self, list obj, dict context, list subs):
814
+ return no_default
815
+
816
+
806
817
  cdef class Placeholder:
807
818
  """
808
819
  Placeholder object to reduce duplicated serialization
@@ -857,6 +868,7 @@ DtypeSerializer.register(ExtensionDtype)
857
868
  ComplexSerializer.register(complex)
858
869
  SliceSerializer.register(slice)
859
870
  RegexSerializer.register(re.Pattern)
871
+ NoDefaultSerializer.register(NoDefault)
860
872
  PlaceholderSerializer.register(Placeholder)
861
873
 
862
874
 
@@ -134,8 +134,10 @@ class ArraySerializer(Serializer):
134
134
  data_parts = [obj.tolist()]
135
135
  else:
136
136
  data_parts = [obj.to_numpy().tolist()]
137
- else:
137
+ elif hasattr(obj, "_data"):
138
138
  data_parts = [getattr(obj, "_data")]
139
+ else:
140
+ data_parts = [getattr(obj, "_pa_array")]
139
141
  return [ser_type], [dtype] + data_parts, False
140
142
 
141
143
  def deserial(self, serialized: List, context: Dict, subs: List):
@@ -155,38 +157,66 @@ class PdTimestampSerializer(Serializer):
155
157
  else:
156
158
  zone_info = []
157
159
  ts = obj.to_pydatetime().timestamp()
158
- return (
159
- [int(ts), obj.microsecond, obj.nanosecond],
160
- zone_info,
161
- bool(zone_info),
162
- )
160
+ elements = [int(ts), obj.microsecond, obj.nanosecond]
161
+ if hasattr(obj, "unit"):
162
+ elements.append(str(obj.unit))
163
+ return elements, zone_info, bool(zone_info)
163
164
 
164
165
  def deserial(self, serialized: List, context: Dict, subs: List):
165
166
  if subs:
166
- val = pd.Timestamp.utcfromtimestamp(serialized[0]).replace(
167
- microsecond=serialized[1], nanosecond=serialized[2]
168
- )
169
- val = val.replace(tzinfo=datetime.timezone.utc).tz_convert(subs[0])
167
+ pydt = datetime.datetime.utcfromtimestamp(serialized[0])
168
+ kwargs = {
169
+ "year": pydt.year,
170
+ "month": pydt.month,
171
+ "day": pydt.day,
172
+ "hour": pydt.hour,
173
+ "minute": pydt.minute,
174
+ "second": pydt.second,
175
+ "microsecond": serialized[1],
176
+ "nanosecond": serialized[2],
177
+ "tzinfo": datetime.timezone.utc,
178
+ }
179
+ if len(serialized) > 3:
180
+ kwargs["unit"] = serialized[3]
181
+ val = pd.Timestamp(**kwargs).tz_convert(subs[0])
170
182
  else:
171
- val = pd.Timestamp.fromtimestamp(serialized[0]).replace(
172
- microsecond=serialized[1], nanosecond=serialized[2]
173
- )
183
+ pydt = datetime.datetime.fromtimestamp(serialized[0])
184
+ kwargs = {
185
+ "year": pydt.year,
186
+ "month": pydt.month,
187
+ "day": pydt.day,
188
+ "hour": pydt.hour,
189
+ "minute": pydt.minute,
190
+ "second": pydt.second,
191
+ "microsecond": serialized[1],
192
+ "nanosecond": serialized[2],
193
+ }
194
+ if len(serialized) >= 4:
195
+ kwargs["unit"] = serialized[3]
196
+ val = pd.Timestamp(**kwargs)
174
197
  return val
175
198
 
176
199
 
177
200
  class PdTimedeltaSerializer(Serializer):
178
201
  def serial(self, obj: pd.Timedelta, context: Dict):
179
- return [int(obj.seconds), obj.microseconds, obj.nanoseconds, obj.days], [], True
202
+ elements = [int(obj.seconds), obj.microseconds, obj.nanoseconds, obj.days]
203
+ if hasattr(obj, "unit"):
204
+ elements.append(str(obj.unit))
205
+ return elements, [], True
180
206
 
181
207
  def deserial(self, serialized: List, context: Dict, subs: List):
182
208
  days = 0 if len(serialized) < 4 else serialized[3]
209
+ unit = None if len(serialized) < 5 else serialized[4]
183
210
  seconds, microseconds, nanoseconds = serialized[:3]
184
- return pd.Timedelta(
185
- days=days,
186
- seconds=seconds,
187
- microseconds=microseconds,
188
- nanoseconds=nanoseconds,
189
- )
211
+ kwargs = {
212
+ "days": days,
213
+ "seconds": seconds,
214
+ "microseconds": microseconds,
215
+ "nanoseconds": nanoseconds,
216
+ }
217
+ if unit is not None:
218
+ kwargs["unit"] = unit
219
+ return pd.Timedelta(**kwargs)
190
220
 
191
221
 
192
222
  class NoDefaultSerializer(Serializer):
@@ -19,6 +19,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type
19
19
  import msgpack
20
20
 
21
21
  from ...lib.mmh3 import hash
22
+ from ...utils import no_default
22
23
  from ..core import Placeholder, Serializer, buffered, load_type
23
24
  from .field import Field
24
25
  from .field_type import DictType, ListType, PrimitiveFieldType, TupleType
@@ -211,6 +212,22 @@ class _NoFieldValue:
211
212
  _no_field_value = _NoFieldValue()
212
213
 
213
214
 
215
+ def _to_primitive_placeholder(v: Any) -> Any:
216
+ if v is _no_field_value or v is no_default:
217
+ return {}
218
+ return v
219
+
220
+
221
+ def _restore_primitive_placeholder(v: Any) -> Any:
222
+ if type(v) is dict:
223
+ if v == {}:
224
+ return _no_field_value
225
+ else:
226
+ return v
227
+ else:
228
+ return v
229
+
230
+
214
231
  class SerializableSerializer(Serializer):
215
232
  """
216
233
  Leverage DictSerializer to perform serde.
@@ -241,9 +258,7 @@ class SerializableSerializer(Serializer):
241
258
  else:
242
259
  primitive_vals = self._get_field_values(obj, obj._PRIMITIVE_FIELDS)
243
260
  # replace _no_field_value as {} to make them msgpack-serializable
244
- primitive_vals = [
245
- v if v is not _no_field_value else {} for v in primitive_vals
246
- ]
261
+ primitive_vals = [_to_primitive_placeholder(v) for v in primitive_vals]
247
262
  if obj._cache_primitive_serial:
248
263
  primitive_vals = msgpack.dumps(primitive_vals)
249
264
  _primitive_serial_cache[obj] = primitive_vals
@@ -311,7 +326,9 @@ class SerializableSerializer(Serializer):
311
326
  cls_fields = server_fields[server_field_num : field_num + count]
312
327
  cls_values = values[field_num : field_num + count]
313
328
  for field, value in zip(cls_fields, cls_values):
314
- if not is_primitive or value != {}:
329
+ if is_primitive:
330
+ value = _restore_primitive_placeholder(value)
331
+ if not is_primitive or value is not _no_field_value:
315
332
  cls._set_field_value(obj, field, value)
316
333
  field_num += count
317
334
  try:
@@ -356,7 +373,9 @@ class SerializableSerializer(Serializer):
356
373
  server_fields + deprecated_fields, key=lambda f: f.name
357
374
  )
358
375
  for field, value in zip(server_fields, values):
359
- if not is_primitive or value != {}:
376
+ if is_primitive:
377
+ value = _restore_primitive_placeholder(value)
378
+ if not is_primitive or value is not _no_field_value:
360
379
  try:
361
380
  cls._set_field_value(obj, field, value)
362
381
  except AttributeError: # pragma: no cover
@@ -46,6 +46,9 @@ class PrimitiveType(Enum):
46
46
  complex128 = 25
47
47
 
48
48
 
49
+ _np_unicode = np.unicode_ if hasattr(np, "unicode_") else np.str_
50
+
51
+
49
52
  _primitive_type_to_valid_types = {
50
53
  PrimitiveType.bool: (bool, np.bool_),
51
54
  PrimitiveType.int8: (int, np.int8),
@@ -60,7 +63,7 @@ _primitive_type_to_valid_types = {
60
63
  PrimitiveType.float32: (float, np.float32),
61
64
  PrimitiveType.float64: (float, np.float64),
62
65
  PrimitiveType.bytes: (bytes, np.bytes_),
63
- PrimitiveType.string: (str, np.unicode_),
66
+ PrimitiveType.string: (str, _np_unicode),
64
67
  PrimitiveType.complex64: (complex, np.complex64),
65
68
  PrimitiveType.complex128: (complex, np.complex128),
66
69
  }
@@ -21,6 +21,7 @@ import pytest
21
21
 
22
22
  from ....core import EntityData
23
23
  from ....lib.wrapped_pickle import switch_unpickle
24
+ from ....utils import no_default
24
25
  from ... import deserialize, serialize
25
26
  from .. import (
26
27
  AnyField,
@@ -143,6 +144,7 @@ class MySerializable(Serializable):
143
144
  oneof1_val=f"{__name__}.MySerializable",
144
145
  oneof2_val=MySimpleSerializable,
145
146
  )
147
+ _no_default_val = Float64Field("no_default_val", default=no_default)
146
148
 
147
149
 
148
150
  @pytest.mark.parametrize("set_is_ci", [False, True], indirect=True)
@@ -187,6 +189,7 @@ def test_serializable(set_is_ci):
187
189
  _dict_val={"a": b"bytes_value"},
188
190
  _ref_val=MySerializable(),
189
191
  _oneof_val=MySerializable(_id="2"),
192
+ _no_default_val=no_default,
190
193
  )
191
194
 
192
195
  header, buffers = serialize(my_serializable)
@@ -234,7 +237,11 @@ def _assert_serializable_eq(my_serializable, my_serializable2):
234
237
  if not hasattr(my_serializable, field.name):
235
238
  continue
236
239
  expect_value = getattr(my_serializable, field_name)
237
- actual_value = getattr(my_serializable2, field_name)
240
+ if expect_value is no_default:
241
+ assert not hasattr(my_serializable2, field.name)
242
+ continue
243
+ else:
244
+ actual_value = getattr(my_serializable2, field_name)
238
245
  if isinstance(expect_value, np.ndarray):
239
246
  np.testing.assert_array_equal(expect_value, actual_value)
240
247
  elif isinstance(expect_value, pd.DataFrame):
@@ -42,7 +42,7 @@ except ImportError:
42
42
  from ...lib.sparse import SparseMatrix
43
43
  from ...lib.wrapped_pickle import switch_unpickle
44
44
  from ...tests.utils import require_cudf, require_cupy
45
- from ...utils import lazy_import
45
+ from ...utils import lazy_import, no_default
46
46
  from .. import (
47
47
  PickleContainer,
48
48
  RemoteException,
@@ -90,6 +90,7 @@ class CustomNamedTuple(NamedTuple):
90
90
  pd.Timedelta(102.234154131),
91
91
  {"abc": 5.6, "def": [3.4], "gh": None, "ijk": {}},
92
92
  OrderedDict([("abcd", 5.6)]),
93
+ no_default,
93
94
  ],
94
95
  )
95
96
  @switch_unpickle
@@ -191,11 +191,6 @@ from .ufunc import ufunc
191
191
  # isort: off
192
192
  # noinspection PyUnresolvedReferences
193
193
  from numpy import (
194
- NAN,
195
- NINF,
196
- AxisError,
197
- Inf,
198
- NaN,
199
194
  e,
200
195
  errstate,
201
196
  geterr,
@@ -206,12 +201,21 @@ from numpy import (
206
201
  seterr,
207
202
  )
208
203
 
204
+ try:
205
+ from numpy.exceptions import AxisError
206
+ except ImportError:
207
+ from numpy import AxisError
208
+
209
+ NAN = nan
210
+ NINF = -inf
211
+ Inf = inf
212
+ NaN = nan
213
+
209
214
  # import numpy types
210
215
  # noinspection PyUnresolvedReferences
211
216
  from numpy import (
212
217
  bool_ as bool,
213
218
  bytes_,
214
- cfloat,
215
219
  character,
216
220
  complex64,
217
221
  complex128,
@@ -242,9 +246,17 @@ from numpy import (
242
246
  uint16,
243
247
  uint32,
244
248
  uint64,
245
- unicode_,
246
249
  unsignedinteger,
247
250
  void,
248
251
  )
249
252
 
253
+ try:
254
+ from numpy import cfloat
255
+ except ImportError:
256
+ from numpy import cdouble as cfloat
257
+ try:
258
+ from numpy import str_ as unicode_
259
+ except ImportError:
260
+ from numpy import unicode_
261
+
250
262
  del fetch, ufunc
maxframe/tests/utils.py CHANGED
@@ -18,11 +18,13 @@ import hashlib
18
18
  import os
19
19
  import queue
20
20
  import socket
21
+ import time
21
22
  import types
22
23
  from threading import Thread
23
24
  from typing import Dict, List, Optional, Set, Tuple
24
25
 
25
26
  import pytest
27
+ from odps import ODPS
26
28
  from tornado import netutil
27
29
 
28
30
  from ..core import Tileable, TileableGraph
@@ -171,3 +173,17 @@ def get_test_unique_name(size=None):
171
173
  if size:
172
174
  digest = digest[:size]
173
175
  return digest + "_" + str(os.getpid())
176
+
177
+
178
+ def assert_mf_index_dtype(idx_obj, dtype):
179
+ from ..dataframe.core import IndexValue
180
+
181
+ assert isinstance(idx_obj, IndexValue.IndexBase) and idx_obj.dtype == dtype
182
+
183
+
184
+ def ensure_table_deleted(odps_entry: ODPS, table_name: str) -> None:
185
+ retry_times = 20
186
+ while odps_entry.exist_table(table_name) and retry_times > 0:
187
+ time.sleep(1)
188
+ retry_times -= 1
189
+ assert not odps_entry.exist_table(table_name)
maxframe/udf.py CHANGED
@@ -19,6 +19,7 @@ from odps.models import Resource
19
19
 
20
20
  from .serialization.serializables import (
21
21
  BoolField,
22
+ DictField,
22
23
  FieldTypes,
23
24
  FunctionField,
24
25
  ListField,
@@ -54,6 +55,10 @@ class MarkedFunction(Serializable):
54
55
  func = FunctionField("func")
55
56
  resources = ListField("resources", FieldTypes.string, default_factory=list)
56
57
  pythonpacks = ListField("pythonpacks", FieldTypes.reference, default_factory=list)
58
+ expect_engine = StringField("expect_engine", default=None)
59
+ expect_resources = DictField(
60
+ "expect_resources", FieldTypes.string, default_factory=dict
61
+ )
57
62
 
58
63
  def __init__(self, func: Optional[Callable] = None, **kw):
59
64
  super().__init__(func=func, **kw)
@@ -120,6 +125,28 @@ def with_python_requirements(
120
125
  return func_wrapper
121
126
 
122
127
 
128
+ def with_running_options(
129
+ *,
130
+ engine: Optional[str] = None,
131
+ cpu: Optional[int] = None,
132
+ memory: Optional[int] = None,
133
+ **kwargs,
134
+ ):
135
+ engine = engine.upper() if engine else None
136
+ resources = {"cpu": cpu, "memory": memory, **kwargs}
137
+
138
+ def func_wrapper(func):
139
+ if all(v is None for v in (engine, cpu, memory)):
140
+ return func
141
+ if isinstance(func, MarkedFunction):
142
+ func.expect_engine = engine
143
+ func.expect_resources = resources
144
+ return func
145
+ return MarkedFunction(func, expect_engine=engine, expect_resources=resources)
146
+
147
+ return func_wrapper
148
+
149
+
123
150
  with_resource_libraries = with_resources
124
151
 
125
152