maxframe 1.2.0__cp310-cp310-win32.whl → 1.3.0__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (72) hide show
  1. maxframe/_utils.cp310-win32.pyd +0 -0
  2. maxframe/codegen.py +70 -21
  3. maxframe/config/config.py +6 -0
  4. maxframe/core/accessor.py +1 -0
  5. maxframe/core/graph/core.cp310-win32.pyd +0 -0
  6. maxframe/dataframe/accessors/__init__.py +1 -1
  7. maxframe/dataframe/accessors/dict_/accessor.py +1 -0
  8. maxframe/dataframe/accessors/dict_/length.py +1 -0
  9. maxframe/dataframe/accessors/dict_/setitem.py +1 -0
  10. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +5 -7
  11. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  12. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  13. maxframe/dataframe/accessors/list_/getitem.py +135 -0
  14. maxframe/dataframe/accessors/list_/length.py +73 -0
  15. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  16. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +79 -0
  17. maxframe/dataframe/accessors/plotting/__init__.py +2 -0
  18. maxframe/dataframe/accessors/string_/__init__.py +1 -0
  19. maxframe/dataframe/datasource/read_odps_query.py +1 -1
  20. maxframe/dataframe/datasource/tests/test_datasource.py +4 -0
  21. maxframe/dataframe/datastore/to_odps.py +6 -0
  22. maxframe/dataframe/extensions/accessor.py +1 -0
  23. maxframe/dataframe/extensions/apply_chunk.py +34 -21
  24. maxframe/dataframe/extensions/flatmap.py +8 -1
  25. maxframe/dataframe/extensions/tests/test_apply_chunk.py +2 -1
  26. maxframe/dataframe/extensions/tests/test_extensions.py +1 -0
  27. maxframe/dataframe/merge/concat.py +7 -4
  28. maxframe/dataframe/merge/merge.py +1 -0
  29. maxframe/dataframe/merge/tests/test_merge.py +97 -47
  30. maxframe/dataframe/missing/tests/test_missing.py +1 -0
  31. maxframe/dataframe/tests/test_utils.py +7 -0
  32. maxframe/dataframe/ufunc/ufunc.py +1 -0
  33. maxframe/dataframe/utils.py +3 -0
  34. maxframe/io/odpsio/schema.py +1 -0
  35. maxframe/learn/contrib/__init__.py +2 -4
  36. maxframe/learn/contrib/llm/__init__.py +1 -0
  37. maxframe/learn/contrib/llm/core.py +31 -10
  38. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  39. maxframe/learn/contrib/llm/models/dashscope.py +4 -3
  40. maxframe/learn/contrib/llm/models/managed.py +39 -0
  41. maxframe/learn/contrib/llm/multi_modal.py +1 -0
  42. maxframe/learn/contrib/llm/text.py +252 -8
  43. maxframe/learn/contrib/models.py +77 -0
  44. maxframe/learn/contrib/utils.py +1 -0
  45. maxframe/learn/contrib/xgboost/__init__.py +8 -1
  46. maxframe/learn/contrib/xgboost/classifier.py +15 -4
  47. maxframe/learn/contrib/xgboost/core.py +108 -1
  48. maxframe/learn/contrib/xgboost/dmatrix.py +1 -1
  49. maxframe/learn/contrib/xgboost/predict.py +8 -3
  50. maxframe/learn/contrib/xgboost/regressor.py +15 -1
  51. maxframe/learn/contrib/xgboost/train.py +5 -4
  52. maxframe/lib/dtypes_extension/__init__.py +2 -1
  53. maxframe/lib/dtypes_extension/dtypes.py +17 -42
  54. maxframe/lib/dtypes_extension/tests/test_dtypes.py +11 -31
  55. maxframe/lib/mmh3.cp310-win32.pyd +0 -0
  56. maxframe/opcodes.py +19 -0
  57. maxframe/serialization/__init__.py +1 -0
  58. maxframe/serialization/core.cp310-win32.pyd +0 -0
  59. maxframe/serialization/core.pyx +12 -1
  60. maxframe/serialization/numpy.py +12 -4
  61. maxframe/serialization/serializables/tests/test_serializable.py +13 -2
  62. maxframe/serialization/tests/test_serial.py +2 -0
  63. maxframe/tensor/merge/concatenate.py +1 -0
  64. maxframe/tensor/misc/unique.py +11 -10
  65. maxframe/tensor/reshape/reshape.py +4 -1
  66. maxframe/utils.py +4 -0
  67. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/METADATA +2 -2
  68. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/RECORD +72 -64
  69. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/WHEEL +1 -1
  70. maxframe_client/session/odps.py +3 -0
  71. maxframe_client/session/tests/test_task.py +1 -0
  72. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/top_level.txt +0 -0
@@ -12,9 +12,11 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Union
16
+
15
17
  import numpy as np
16
18
 
17
- from ....tensor import argmax, transpose
19
+ from .... import tensor as mt
18
20
  from ....tensor.merge.vstack import _vstack
19
21
  from ..utils import make_import_error_func
20
22
  from .core import XGBScikitLearnBase, xgboost
@@ -33,6 +35,14 @@ else:
33
35
  Implementation of the scikit-learn API for XGBoost classification.
34
36
  """
35
37
 
38
+ def __init__(
39
+ self,
40
+ xgb_model: Union[xgboost.XGBClassifier, xgboost.Booster] = None,
41
+ **kwargs,
42
+ ):
43
+ super().__init__(**kwargs)
44
+ self._set_model(xgb_model)
45
+
36
46
  def fit(
37
47
  self,
38
48
  X,
@@ -46,7 +56,7 @@ else:
46
56
  **kw,
47
57
  ):
48
58
  session = kw.pop("session", None)
49
- run_kwargs = kw.pop("run_kwargs", dict())
59
+ run_kwargs = kw.pop("run_kwargs", None) or dict()
50
60
  dtrain, evals = wrap_evaluation_matrices(
51
61
  None,
52
62
  X,
@@ -58,6 +68,7 @@ else:
58
68
  base_margin_eval_set,
59
69
  )
60
70
  params = self.get_xgb_params()
71
+ self._n_features_in = X.shape[1]
61
72
  self.n_classes_ = num_class or 1
62
73
  if self.n_classes_ > 2:
63
74
  params["objective"] = "multi:softprob"
@@ -81,7 +92,7 @@ else:
81
92
  def predict(self, data, **kw):
82
93
  prob = self.predict_proba(data, flag=True, **kw)
83
94
  if prob.ndim > 1:
84
- prediction = argmax(prob, axis=1)
95
+ prediction = mt.argmax(prob, axis=1)
85
96
  else:
86
97
  prediction = (prob > 0.5).astype(np.int64)
87
98
  return prediction
@@ -103,7 +114,7 @@ else:
103
114
  # binary logistic function
104
115
  classone_probs = prediction
105
116
  classzero_probs = 1.0 - classone_probs
106
- return transpose(_vstack((classzero_probs, classone_probs)))
117
+ return mt.transpose(_vstack((classzero_probs, classone_probs)))
107
118
 
108
119
  @property
109
120
  def classes_(self) -> np.ndarray:
@@ -12,14 +12,21 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Callable, Dict, List, Optional, Tuple
15
+ import json
16
+ import os
17
+ import tempfile
18
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
19
+
20
+ import numpy as np
16
21
 
17
22
  try:
18
23
  import xgboost
19
24
  except ImportError:
20
25
  xgboost = None
21
26
 
27
+ from ....core import OutputType
22
28
  from ...core import Model, ModelData
29
+ from ..models import ModelApplyChunk, to_remote_model
23
30
  from .dmatrix import DMatrix
24
31
 
25
32
 
@@ -32,6 +39,33 @@ class BoosterData(ModelData):
32
39
  super().__init__(*args, **kwargs)
33
40
  self._evals_result = evals_result if evals_result is not None else dict()
34
41
 
42
+ @staticmethod
43
+ def _get_booster_score(bst, fmap=None, importance_type="weight"):
44
+ if not fmap:
45
+ tmp_file_name = ""
46
+ else:
47
+ tmp_file = tempfile.NamedTemporaryFile(delete=False)
48
+ tmp_file.write(fmap)
49
+ tmp_file.close()
50
+ tmp_file_name = tmp_file.name
51
+
52
+ try:
53
+ return bst.get_score(fmap=tmp_file_name, importance_type=importance_type)
54
+ finally:
55
+ if tmp_file_name:
56
+ os.unlink(tmp_file_name)
57
+
58
+ def get_score(self, fmap="", importance_type="weight"):
59
+ op = ModelApplyChunk(
60
+ func=self._get_booster_score, output_types=[OutputType.object]
61
+ )
62
+ if not fmap:
63
+ fmap_data = None
64
+ else:
65
+ with open(fmap, "rb") as fmap_file:
66
+ fmap_data = fmap_file.read()
67
+ return op(self, [{}], fmap=fmap_data, importance_type=importance_type)[0]
68
+
35
69
  def execute(self, session=None, **kw):
36
70
  # The evals_result should be fetched when BoosterData.execute() is called.
37
71
  result = super().execute(session=session, **kw)
@@ -82,6 +116,30 @@ else:
82
116
  Base class for implementing scikit-learn interface
83
117
  """
84
118
 
119
+ def _set_model(
120
+ self, xgb_model: Union[xgboost.XGBModel, xgboost.Booster] = None
121
+ ):
122
+ booster = None
123
+ if isinstance(xgb_model, xgboost.XGBModel):
124
+ booster = xgb_model.get_booster()
125
+ elif isinstance(xgb_model, xgboost.Booster):
126
+ booster = xgb_model
127
+
128
+ if booster is not None:
129
+ self._Booster = to_remote_model(booster, model_cls=Booster)
130
+
131
+ @classmethod
132
+ def _get_param_names(cls):
133
+ # make sure `xgb_model` not treated as a model param
134
+ names = super()._get_param_names()
135
+ if names:
136
+ names = [p for p in names if p != "xgb_model"]
137
+ return names
138
+
139
+ def __repr__(self):
140
+ local_model = self.fetch()
141
+ return repr(local_model)
142
+
85
143
  def fit(
86
144
  self,
87
145
  X,
@@ -157,6 +215,55 @@ else:
157
215
  self._Booster.execute(session=session, **run_kwargs)
158
216
  return super().evals_result()
159
217
 
218
+ def execute(self, session=None, run_kwargs=None):
219
+ self._Booster.execute(session=session, run_kwargs=run_kwargs)
220
+ return self
221
+
222
+ def fetch(self, session=None, run_kwargs=None):
223
+ from xgboost import sklearn as xgb_sklearn
224
+
225
+ booster = self._Booster.fetch(session=session, run_kwargs=run_kwargs)
226
+ remote_bst, self._Booster = self._Booster, booster
227
+ try:
228
+ local_cls = getattr(xgb_sklearn, type(self).__name__)
229
+ local_model = local_cls(**self.get_params(deep=True))
230
+ local_model._Booster = booster
231
+ return local_model
232
+ finally:
233
+ self._Booster = remote_bst
234
+
235
+ @staticmethod
236
+ def _calc_feature_importance(bst, importance_type, n_features):
237
+ config = json.loads(bst.save_config())
238
+ bst_type = config["learner"]["gradient_booster"]["name"]
239
+ dft = "weight" if bst_type == "gblinear" else "gain"
240
+ importance_type = importance_type or dft
241
+ score = bst.get_score(importance_type=importance_type)
242
+ if bst.feature_names is None:
243
+ feature_names = [f"f{i}" for i in range(n_features)]
244
+ else:
245
+ feature_names = bst.feature_names
246
+ # gblinear returns all features so the `get` in next line is only for gbtree.
247
+ all_features = [score.get(f, 0.0) for f in feature_names]
248
+ all_features_arr = np.array(all_features, dtype=np.float32)
249
+ total = all_features_arr.sum()
250
+ if total == 0:
251
+ return all_features_arr
252
+ return all_features_arr / total
253
+
254
+ @property
255
+ def feature_importances_(self):
256
+ op = ModelApplyChunk(
257
+ func=self._calc_feature_importance, output_types=[OutputType.tensor]
258
+ )
259
+ params = {"shape": (self._n_features_in,), "dtype": np.dtype(np.float32)}
260
+ return op(
261
+ self.get_booster(),
262
+ [params],
263
+ importance_type=self.importance_type,
264
+ n_features=self._n_features_in,
265
+ )[0]
266
+
160
267
  def wrap_evaluation_matrices(
161
268
  missing: float,
162
269
  X: Any,
@@ -141,8 +141,8 @@ def to_dmatrix(
141
141
  label_lower_bound=label_lower_bound,
142
142
  label_upper_bound=label_upper_bound,
143
143
  gpu=data.op.gpu,
144
- enable_categorical=enable_categorical,
145
144
  _output_types=get_output_types(data),
145
+ enable_categorical=enable_categorical,
146
146
  )
147
147
  return op()
148
148
 
@@ -26,7 +26,8 @@ from ....serialization.serializables import (
26
26
  TupleField,
27
27
  )
28
28
  from ....tensor.core import TensorOrder
29
- from .core import BoosterData
29
+ from ..models import to_remote_model
30
+ from .core import Booster, BoosterData
30
31
  from .dmatrix import check_data
31
32
 
32
33
 
@@ -96,11 +97,15 @@ def predict(
96
97
  -------
97
98
  results: Booster
98
99
  """
100
+ import xgboost
101
+
99
102
  data = check_data(data)
100
- # TODO: check model datatype
103
+ if not isinstance(model, (Booster, BoosterData, xgboost.Booster)):
104
+ raise TypeError(f"model has to be a xgboost.Booster, got {type(model)} instead")
105
+ elif isinstance(model, xgboost.Booster):
106
+ model = to_remote_model(model, model_cls=Booster)
101
107
 
102
108
  output_types = [OutputType.tensor]
103
-
104
109
  iteration_range = iteration_range or (0, 0)
105
110
 
106
111
  return XGBPredict(
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Union
15
16
 
16
17
  from ..utils import make_import_error_func
17
18
  from .core import XGBScikitLearnBase, xgboost
@@ -19,15 +20,25 @@ from .core import XGBScikitLearnBase, xgboost
19
20
  if not xgboost:
20
21
  XGBRegressor = make_import_error_func("xgboost")
21
22
  else:
23
+ from xgboost.sklearn import XGBRegressorBase
24
+
22
25
  from .core import wrap_evaluation_matrices
23
26
  from .predict import predict
24
27
  from .train import train
25
28
 
26
- class XGBRegressor(XGBScikitLearnBase):
29
+ class XGBRegressor(XGBScikitLearnBase, XGBRegressorBase):
27
30
  """
28
31
  Implementation of the scikit-learn API for XGBoost regressor.
29
32
  """
30
33
 
34
+ def __init__(
35
+ self,
36
+ xgb_model: Union[xgboost.XGBRegressor, xgboost.Booster] = None,
37
+ **kwargs,
38
+ ):
39
+ super().__init__(**kwargs)
40
+ self._set_model(xgb_model)
41
+
31
42
  def fit(
32
43
  self,
33
44
  X,
@@ -41,6 +52,9 @@ else:
41
52
  ):
42
53
  session = kw.pop("session", None)
43
54
  run_kwargs = kw.pop("run_kwargs", dict())
55
+
56
+ self._n_features_in = X.shape[1]
57
+
44
58
  dtrain, evals = wrap_evaluation_matrices(
45
59
  None,
46
60
  X,
@@ -82,9 +82,8 @@ class XGBTrain(Operator, TileableOperatorMixin):
82
82
  inputs = [self.dtrain]
83
83
  if self.has_evals_result:
84
84
  inputs.extend(e[0] for e in self.evals)
85
- return self.new_tileables(
86
- inputs, object_class=Booster, evals_result=evals_result
87
- )[0]
85
+ kws = [{"object_class": Booster}, {}]
86
+ return self.new_tileables(inputs, kws=kws, evals_result=evals_result)[0]
88
87
 
89
88
  @property
90
89
  def output_limit(self):
@@ -129,4 +128,6 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
129
128
  num_class=num_class,
130
129
  **kwargs,
131
130
  )(evals_result)
132
- return data.execute(session=session, **run_kwargs) if evals else data
131
+ if evals:
132
+ data.execute(session=session, **run_kwargs)
133
+ return data
@@ -11,4 +11,5 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from .dtypes import ArrowDtype, dict_, infer_arrow_dtype, is_map_dtype
14
+
15
+ from .dtypes import ArrowDtype, dict_, is_list_dtype, is_map_dtype, list_
@@ -11,12 +11,11 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  from typing import Union
15
16
 
16
- import numpy as np
17
17
  import pandas as pd
18
18
  import pyarrow as pa
19
- from pandas.api.extensions import ExtensionDtype
20
19
 
21
20
  try:
22
21
  from pandas import ArrowDtype
@@ -38,6 +37,15 @@ def dict_(
38
37
  return pd.ArrowDtype(pa.map_(key_type, item_type))
39
38
 
40
39
 
40
+ def list_(value_type: Union[pa.DataType, pa.Field]):
41
+ """
42
+ Create ``pd.ArrowDtype(pa.ListType)`` instance from a list or field.
43
+ """
44
+ if ArrowDtype is None:
45
+ raise ImportError("ArrowDtype is not supported in current environment")
46
+ return pd.ArrowDtype(pa.list_(value_type))
47
+
48
+
41
49
  def is_map_dtype(dtype: ArrowDtype) -> bool:
42
50
  """
43
51
  Check whether the dtype is a map type.
@@ -47,45 +55,12 @@ def is_map_dtype(dtype: ArrowDtype) -> bool:
47
55
  return isinstance(dtype, ArrowDtype) and isinstance(dtype.pyarrow_dtype, pa.MapType)
48
56
 
49
57
 
50
- _dtype_mapping = {
51
- pd.Int8Dtype(): lambda x: pa.int8(),
52
- pd.Int16Dtype(): lambda x: pa.int16(),
53
- pd.Int32Dtype(): lambda x: pa.int32(),
54
- pd.Int64Dtype(): lambda x: pa.int64(),
55
- pd.UInt8Dtype(): lambda x: pa.uint8(),
56
- pd.UInt16Dtype(): lambda x: pa.uint16(),
57
- pd.UInt32Dtype(): lambda x: pa.uint32(),
58
- pd.UInt64Dtype(): lambda x: pa.uint64(),
59
- pd.BooleanDtype(): lambda x: pa.bool_(),
60
- pd.Float32Dtype(): lambda x: pa.float32(),
61
- pd.Float64Dtype(): lambda x: pa.float64(),
62
- pd.StringDtype(): lambda x: pa.string(),
63
- }
64
-
65
-
66
- def infer_arrow_dtype(
67
- dtype: Union[np.dtype, pa.DataType, ExtensionDtype]
68
- ) -> Union[ArrowDtype, ExtensionDtype]:
58
+ def is_list_dtype(dtype: ArrowDtype) -> bool:
69
59
  """
70
- Convert any pandas accepted dtype to arrow type in a best effort way.
71
-
72
- Parameters
73
- ----------
74
- dtype : Union[np.dtype, pa.DataType, ExtensionDtype]
75
- The dtype instance, can be np.dtype, pa.DataType or ExtensionDtype
76
-
77
- Returns
78
- -------
79
- Union[pd.ArrowDtype, ExtensionDtype]: The converted pd.ArrowDtype, or ExtensionDtype if conversion failed.
60
+ Check whether the dtype is a list dtype.
80
61
  """
81
- if isinstance(dtype, ArrowDtype):
82
- return dtype
83
-
84
- if isinstance(dtype, np.dtype):
85
- return ArrowDtype(pa.from_numpy_dtype(dtype))
86
- if isinstance(dtype, pd.DatetimeTZDtype):
87
- return pa.timestamp(dtype.unit, dtype.tz)
88
-
89
- if dtype in _dtype_mapping:
90
- return ArrowDtype(_dtype_mapping[dtype](dtype))
91
- return dtype
62
+ if ArrowDtype is None:
63
+ raise ImportError("ArrowDtype is not supported in current environment")
64
+ return isinstance(dtype, ArrowDtype) and isinstance(
65
+ dtype.pyarrow_dtype, pa.ListType
66
+ )
@@ -12,24 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import numpy as np
16
15
  import pandas as pd
17
16
  import pyarrow as pa
18
17
  import pytest
19
18
 
20
19
  from ....utils import ARROW_DTYPE_NOT_SUPPORTED
21
- from ..dtypes import dict_, infer_arrow_dtype, is_map_dtype
20
+ from ..dtypes import dict_, is_list_dtype, is_map_dtype, list_
22
21
 
23
22
  try:
24
23
  from pandas import ArrowDtype
25
24
  except:
26
25
  ArrowDtype = None
27
26
 
28
-
29
- @pytest.mark.skipif(
27
+ pytestmark = pytest.mark.skipif(
30
28
  ARROW_DTYPE_NOT_SUPPORTED,
31
29
  reason="pandas doesn't support ArrowDtype",
32
30
  )
31
+
32
+
33
33
  def test_map_dtype():
34
34
  dt = dict_(pa.int64(), pa.string())
35
35
  assert is_map_dtype(dt)
@@ -39,30 +39,10 @@ def test_map_dtype():
39
39
  assert not is_map_dtype(pd.Int64Dtype)
40
40
 
41
41
 
42
- @pytest.mark.skipif(
43
- ARROW_DTYPE_NOT_SUPPORTED,
44
- reason="pandas doesn't support ArrowDtype",
45
- )
46
- @pytest.mark.parametrize(
47
- "input_dtype, expected_type, expected_pa_dtype",
48
- [
49
- (
50
- ArrowDtype(pa.int64()) if ArrowDtype else None,
51
- ArrowDtype,
52
- pa.int64(),
53
- ), # pd.ArrowDtype
54
- (np.dtype("int64"), ArrowDtype, pa.int64()), # np.dtype
55
- (pd.CategoricalDtype(), pd.CategoricalDtype, None), # pa.DataType
56
- (pd.Int64Dtype(), ArrowDtype, pa.int64()), # pd.ExtensionDtype
57
- (
58
- pd.DatetimeTZDtype("ns", "Asia/Shanghai"),
59
- pa.TimestampType,
60
- pa.timestamp("ns", "Asia/Shanghai"),
61
- ),
62
- ],
63
- )
64
- def test_infer_arrow_dtype(input_dtype, expected_type, expected_pa_dtype):
65
- result = infer_arrow_dtype(input_dtype)
66
- assert isinstance(result, expected_type)
67
- if expected_type == ArrowDtype:
68
- assert result.pyarrow_dtype == expected_pa_dtype
42
+ def test_list_dtype():
43
+ dt = list_(pa.int64())
44
+ assert is_list_dtype(dt)
45
+
46
+ dt = pd.ArrowDtype(pa.map_(pa.int64(), pa.string()))
47
+ assert not is_list_dtype(dt)
48
+ assert not is_list_dtype(pd.Int64Dtype)
Binary file
maxframe/opcodes.py CHANGED
@@ -395,6 +395,11 @@ FUSE = 801
395
395
  # LLM
396
396
  DASHSCOPE_TEXT_GENERATION = 810
397
397
  DASHSCOPE_MULTI_MODAL_GENERATION = 811
398
+ MANAGED_TEXT_MODAL_GENERATION = 812
399
+ MANAGED_MULTI_MODAL_GENERATION = 813
400
+ LLM_TEXT_SUMMARIZE_TASK = 814
401
+ LLM_TEXT_TRANSLATE_TASK = 815
402
+ LLM_TEXT_CLASSIFY_TASK = 816
398
403
 
399
404
  # table like input for tensor
400
405
  TABLE_COO = 1003
@@ -575,16 +580,30 @@ DATAFRAME_RESHUFFLE = 10001
575
580
  FLATMAP = 10002
576
581
  FLATJSON = 10003
577
582
  APPLY_CHUNK = 10004
583
+
578
584
  SERIES_DICT_GETITEM = 10005
579
585
  SERIES_DICT_SETITEM = 10006
580
586
  SERIES_DICT_LENGTH = 10007
581
587
  SERIES_DICT_REMOVE = 10008
582
588
  SERIES_DICT_CONTAINS = 10009
589
+ SERIES_DICT_FLATTEN = 10010
590
+
591
+ SERIES_LIST_GETITEM = 10020
592
+ SERIES_LIST_SETITEM = 10021
593
+ SERIES_LIST_CONTAINS = 10022
594
+ SERIES_LIST_LENGTH = 10023
595
+ SERIES_LIST_INSERT = 10024
596
+ SERIES_LIST_EXTEND = 10025
597
+ SERIES_LIST_POP = 10026
598
+ SERIES_LIST_SORT = 10027
599
+ SERIES_LIST_FLATTEN = 10028
583
600
 
584
601
  # MaxFrame internal operators
585
602
  DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
586
603
  GROUPBY_AGGR_SAME_INDEX_MERGE = 100002
587
604
  DATAFRAME_ILOC_GET_AND_RENAME_ITEM = 100003
605
+ COLLECT_MODEL_RESULT = 100004
606
+ MODEL_DATA_SOURCE = 100005
588
607
 
589
608
  # fetches
590
609
  FETCH_SHUFFLE = 999998
@@ -16,6 +16,7 @@ from . import arrow, exception, maxframe_objects, numpy, pandas, scipy, serializ
16
16
  from .core import (
17
17
  PickleContainer,
18
18
  Serializer,
19
+ clear_type_cache,
19
20
  deserialize,
20
21
  load_type,
21
22
  pickle_buffers,
Binary file
@@ -112,7 +112,14 @@ cpdef object load_type(str class_name, object parent_class):
112
112
 
113
113
  mod_name, cls_name = class_name.rsplit("#", 1)
114
114
 
115
- cls = importlib.import_module(mod_name)
115
+ try:
116
+ cls = importlib.import_module(mod_name)
117
+ except ImportError as ex:
118
+ raise ImportError(
119
+ f"Failed to import {mod_name} when loading "
120
+ f"class {class_name}, {ex}"
121
+ ) from None
122
+
116
123
  for sub_cls_name in cls_name.split("."):
117
124
  cls = getattr(cls, sub_cls_name)
118
125
  _type_cache[class_name] = cls
@@ -122,6 +129,10 @@ cpdef object load_type(str class_name, object parent_class):
122
129
  return cls
123
130
 
124
131
 
132
+ cpdef void clear_type_cache():
133
+ _type_cache.clear()
134
+
135
+
125
136
  cdef Serializer get_deserializer(int32_t deserializer_id):
126
137
  return _deserializers[deserializer_id]
127
138
 
@@ -70,10 +70,18 @@ class NDArraySerializer(Serializer):
70
70
  if dtype_new_order:
71
71
  dtype = dtype[dtype_new_order]
72
72
  if dtype.hasobject:
73
- shape = header["shape"]
74
- # fill empty object array
75
- val = np.empty(shape, dtype=dtype)
76
- val[(slice(None),) * len(shape)] = subs[0]
73
+ shape = tuple(header["shape"])
74
+ if shape == ():
75
+ val = np.array(subs[0]).reshape(shape)
76
+ else:
77
+ # fill empty object array
78
+ val = np.empty(shape, dtype=dtype)
79
+ try:
80
+ val[(slice(None),) * len(shape)] = subs[0]
81
+ except ValueError:
82
+ val[(slice(None),) * len(shape)] = np.array(
83
+ subs[0], dtype=dtype
84
+ ).reshape(shape)
77
85
  else:
78
86
  val = np.ndarray(
79
87
  shape=tuple(header["shape"]),
@@ -22,7 +22,7 @@ import pytest
22
22
  from ....core import EntityData
23
23
  from ....lib.wrapped_pickle import switch_unpickle
24
24
  from ....utils import no_default
25
- from ... import deserialize, serialize
25
+ from ... import clear_type_cache, deserialize, serialize
26
26
  from .. import (
27
27
  AnyField,
28
28
  BoolField,
@@ -202,6 +202,7 @@ def test_serializable(set_is_ci):
202
202
  def test_compatible_serializable(set_is_ci):
203
203
  global MySimpleSerializable, MySubSerializable
204
204
 
205
+ clear_type_cache()
205
206
  old_base, old_sub = MySimpleSerializable, MySubSerializable
206
207
 
207
208
  try:
@@ -231,13 +232,23 @@ def test_compatible_serializable(set_is_ci):
231
232
  my_sub_serializable2 = deserialize(header, buffers)
232
233
  assert type(my_sub_serializable) is not type(my_sub_serializable2)
233
234
  _assert_serializable_eq(my_sub_serializable, my_sub_serializable2)
235
+
236
+ header, buffers = serialize(my_sub_serializable2)
234
237
  finally:
235
238
  MySimpleSerializable, MySubSerializable = old_base, old_sub
239
+ MyMidSerializable = None
240
+ clear_type_cache()
241
+
242
+ my_sub_serializable3 = deserialize(header, buffers)
243
+ assert type(my_sub_serializable2) is not type(my_sub_serializable3)
244
+ _assert_serializable_eq(my_sub_serializable2, my_sub_serializable3)
236
245
 
237
246
 
238
247
  def _assert_serializable_eq(my_serializable, my_serializable2):
239
248
  for field_name, field in my_serializable._FIELDS.items():
240
- if not hasattr(my_serializable, field.name):
249
+ if not hasattr(my_serializable, field.name) or not hasattr(
250
+ my_serializable2, field.name
251
+ ):
241
252
  continue
242
253
  expect_value = getattr(my_serializable, field_name)
243
254
  if expect_value is no_default:
@@ -162,6 +162,8 @@ def test_timezones(val):
162
162
  @pytest.mark.parametrize(
163
163
  "val",
164
164
  [
165
+ np.array(1234),
166
+ np.array("str-val", dtype="O"),
165
167
  np.array([1024])[0],
166
168
  np.array(np.random.rand(100, 100)),
167
169
  np.array(np.random.rand(100, 100).T),
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import numpy as np
15
16
 
16
17
  from ... import opcodes