maxframe 1.2.1__cp39-cp39-win_amd64.whl → 1.3.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (70) hide show
  1. maxframe/_utils.cp39-win_amd64.pyd +0 -0
  2. maxframe/codegen.py +70 -21
  3. maxframe/config/config.py +6 -0
  4. maxframe/core/accessor.py +1 -0
  5. maxframe/core/graph/core.cp39-win_amd64.pyd +0 -0
  6. maxframe/dataframe/accessors/__init__.py +1 -1
  7. maxframe/dataframe/accessors/dict_/accessor.py +1 -0
  8. maxframe/dataframe/accessors/dict_/length.py +1 -0
  9. maxframe/dataframe/accessors/dict_/setitem.py +1 -0
  10. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +5 -7
  11. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  12. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  13. maxframe/dataframe/accessors/list_/getitem.py +135 -0
  14. maxframe/dataframe/accessors/list_/length.py +73 -0
  15. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  16. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +79 -0
  17. maxframe/dataframe/accessors/plotting/__init__.py +2 -0
  18. maxframe/dataframe/accessors/string_/__init__.py +1 -0
  19. maxframe/dataframe/datastore/to_odps.py +6 -0
  20. maxframe/dataframe/extensions/accessor.py +1 -0
  21. maxframe/dataframe/extensions/apply_chunk.py +34 -21
  22. maxframe/dataframe/extensions/flatmap.py +8 -1
  23. maxframe/dataframe/extensions/tests/test_apply_chunk.py +2 -1
  24. maxframe/dataframe/extensions/tests/test_extensions.py +1 -0
  25. maxframe/dataframe/merge/concat.py +7 -4
  26. maxframe/dataframe/merge/merge.py +1 -0
  27. maxframe/dataframe/merge/tests/test_merge.py +97 -47
  28. maxframe/dataframe/missing/tests/test_missing.py +1 -0
  29. maxframe/dataframe/tests/test_utils.py +7 -0
  30. maxframe/dataframe/ufunc/ufunc.py +1 -0
  31. maxframe/dataframe/utils.py +3 -0
  32. maxframe/io/odpsio/schema.py +1 -0
  33. maxframe/learn/contrib/__init__.py +2 -4
  34. maxframe/learn/contrib/llm/__init__.py +1 -0
  35. maxframe/learn/contrib/llm/core.py +31 -10
  36. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  37. maxframe/learn/contrib/llm/models/dashscope.py +4 -3
  38. maxframe/learn/contrib/llm/models/managed.py +39 -0
  39. maxframe/learn/contrib/llm/multi_modal.py +1 -0
  40. maxframe/learn/contrib/llm/text.py +252 -8
  41. maxframe/learn/contrib/models.py +77 -0
  42. maxframe/learn/contrib/utils.py +1 -0
  43. maxframe/learn/contrib/xgboost/__init__.py +8 -1
  44. maxframe/learn/contrib/xgboost/classifier.py +15 -4
  45. maxframe/learn/contrib/xgboost/core.py +108 -1
  46. maxframe/learn/contrib/xgboost/dmatrix.py +1 -1
  47. maxframe/learn/contrib/xgboost/predict.py +8 -3
  48. maxframe/learn/contrib/xgboost/regressor.py +15 -1
  49. maxframe/learn/contrib/xgboost/train.py +5 -4
  50. maxframe/lib/dtypes_extension/__init__.py +2 -1
  51. maxframe/lib/dtypes_extension/dtypes.py +21 -0
  52. maxframe/lib/dtypes_extension/tests/test_dtypes.py +13 -3
  53. maxframe/lib/mmh3.cp39-win_amd64.pyd +0 -0
  54. maxframe/opcodes.py +19 -0
  55. maxframe/serialization/__init__.py +1 -0
  56. maxframe/serialization/core.cp39-win_amd64.pyd +0 -0
  57. maxframe/serialization/core.pyx +12 -1
  58. maxframe/serialization/numpy.py +12 -4
  59. maxframe/serialization/serializables/tests/test_serializable.py +13 -2
  60. maxframe/serialization/tests/test_serial.py +2 -0
  61. maxframe/tensor/merge/concatenate.py +1 -0
  62. maxframe/tensor/misc/unique.py +11 -10
  63. maxframe/tensor/reshape/reshape.py +4 -1
  64. maxframe/utils.py +4 -0
  65. {maxframe-1.2.1.dist-info → maxframe-1.3.0.dist-info}/METADATA +2 -2
  66. {maxframe-1.2.1.dist-info → maxframe-1.3.0.dist-info}/RECORD +70 -62
  67. {maxframe-1.2.1.dist-info → maxframe-1.3.0.dist-info}/WHEEL +1 -1
  68. maxframe_client/session/odps.py +3 -0
  69. maxframe_client/session/tests/test_task.py +1 -0
  70. {maxframe-1.2.1.dist-info → maxframe-1.3.0.dist-info}/top_level.txt +0 -0
@@ -12,14 +12,21 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Callable, Dict, List, Optional, Tuple
15
+ import json
16
+ import os
17
+ import tempfile
18
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
19
+
20
+ import numpy as np
16
21
 
17
22
  try:
18
23
  import xgboost
19
24
  except ImportError:
20
25
  xgboost = None
21
26
 
27
+ from ....core import OutputType
22
28
  from ...core import Model, ModelData
29
+ from ..models import ModelApplyChunk, to_remote_model
23
30
  from .dmatrix import DMatrix
24
31
 
25
32
 
@@ -32,6 +39,33 @@ class BoosterData(ModelData):
32
39
  super().__init__(*args, **kwargs)
33
40
  self._evals_result = evals_result if evals_result is not None else dict()
34
41
 
42
+ @staticmethod
43
+ def _get_booster_score(bst, fmap=None, importance_type="weight"):
44
+ if not fmap:
45
+ tmp_file_name = ""
46
+ else:
47
+ tmp_file = tempfile.NamedTemporaryFile(delete=False)
48
+ tmp_file.write(fmap)
49
+ tmp_file.close()
50
+ tmp_file_name = tmp_file.name
51
+
52
+ try:
53
+ return bst.get_score(fmap=tmp_file_name, importance_type=importance_type)
54
+ finally:
55
+ if tmp_file_name:
56
+ os.unlink(tmp_file_name)
57
+
58
+ def get_score(self, fmap="", importance_type="weight"):
59
+ op = ModelApplyChunk(
60
+ func=self._get_booster_score, output_types=[OutputType.object]
61
+ )
62
+ if not fmap:
63
+ fmap_data = None
64
+ else:
65
+ with open(fmap, "rb") as fmap_file:
66
+ fmap_data = fmap_file.read()
67
+ return op(self, [{}], fmap=fmap_data, importance_type=importance_type)[0]
68
+
35
69
  def execute(self, session=None, **kw):
36
70
  # The evals_result should be fetched when BoosterData.execute() is called.
37
71
  result = super().execute(session=session, **kw)
@@ -82,6 +116,30 @@ else:
82
116
  Base class for implementing scikit-learn interface
83
117
  """
84
118
 
119
+ def _set_model(
120
+ self, xgb_model: Union[xgboost.XGBModel, xgboost.Booster] = None
121
+ ):
122
+ booster = None
123
+ if isinstance(xgb_model, xgboost.XGBModel):
124
+ booster = xgb_model.get_booster()
125
+ elif isinstance(xgb_model, xgboost.Booster):
126
+ booster = xgb_model
127
+
128
+ if booster is not None:
129
+ self._Booster = to_remote_model(booster, model_cls=Booster)
130
+
131
+ @classmethod
132
+ def _get_param_names(cls):
133
+ # make sure `xgb_model` not treated as a model param
134
+ names = super()._get_param_names()
135
+ if names:
136
+ names = [p for p in names if p != "xgb_model"]
137
+ return names
138
+
139
+ def __repr__(self):
140
+ local_model = self.fetch()
141
+ return repr(local_model)
142
+
85
143
  def fit(
86
144
  self,
87
145
  X,
@@ -157,6 +215,55 @@ else:
157
215
  self._Booster.execute(session=session, **run_kwargs)
158
216
  return super().evals_result()
159
217
 
218
+ def execute(self, session=None, run_kwargs=None):
219
+ self._Booster.execute(session=session, run_kwargs=run_kwargs)
220
+ return self
221
+
222
+ def fetch(self, session=None, run_kwargs=None):
223
+ from xgboost import sklearn as xgb_sklearn
224
+
225
+ booster = self._Booster.fetch(session=session, run_kwargs=run_kwargs)
226
+ remote_bst, self._Booster = self._Booster, booster
227
+ try:
228
+ local_cls = getattr(xgb_sklearn, type(self).__name__)
229
+ local_model = local_cls(**self.get_params(deep=True))
230
+ local_model._Booster = booster
231
+ return local_model
232
+ finally:
233
+ self._Booster = remote_bst
234
+
235
+ @staticmethod
236
+ def _calc_feature_importance(bst, importance_type, n_features):
237
+ config = json.loads(bst.save_config())
238
+ bst_type = config["learner"]["gradient_booster"]["name"]
239
+ dft = "weight" if bst_type == "gblinear" else "gain"
240
+ importance_type = importance_type or dft
241
+ score = bst.get_score(importance_type=importance_type)
242
+ if bst.feature_names is None:
243
+ feature_names = [f"f{i}" for i in range(n_features)]
244
+ else:
245
+ feature_names = bst.feature_names
246
+ # gblinear returns all features so the `get` in next line is only for gbtree.
247
+ all_features = [score.get(f, 0.0) for f in feature_names]
248
+ all_features_arr = np.array(all_features, dtype=np.float32)
249
+ total = all_features_arr.sum()
250
+ if total == 0:
251
+ return all_features_arr
252
+ return all_features_arr / total
253
+
254
+ @property
255
+ def feature_importances_(self):
256
+ op = ModelApplyChunk(
257
+ func=self._calc_feature_importance, output_types=[OutputType.tensor]
258
+ )
259
+ params = {"shape": (self._n_features_in,), "dtype": np.dtype(np.float32)}
260
+ return op(
261
+ self.get_booster(),
262
+ [params],
263
+ importance_type=self.importance_type,
264
+ n_features=self._n_features_in,
265
+ )[0]
266
+
160
267
  def wrap_evaluation_matrices(
161
268
  missing: float,
162
269
  X: Any,
@@ -141,8 +141,8 @@ def to_dmatrix(
141
141
  label_lower_bound=label_lower_bound,
142
142
  label_upper_bound=label_upper_bound,
143
143
  gpu=data.op.gpu,
144
- enable_categorical=enable_categorical,
145
144
  _output_types=get_output_types(data),
145
+ enable_categorical=enable_categorical,
146
146
  )
147
147
  return op()
148
148
 
@@ -26,7 +26,8 @@ from ....serialization.serializables import (
26
26
  TupleField,
27
27
  )
28
28
  from ....tensor.core import TensorOrder
29
- from .core import BoosterData
29
+ from ..models import to_remote_model
30
+ from .core import Booster, BoosterData
30
31
  from .dmatrix import check_data
31
32
 
32
33
 
@@ -96,11 +97,15 @@ def predict(
96
97
  -------
97
98
  results: Booster
98
99
  """
100
+ import xgboost
101
+
99
102
  data = check_data(data)
100
- # TODO: check model datatype
103
+ if not isinstance(model, (Booster, BoosterData, xgboost.Booster)):
104
+ raise TypeError(f"model has to be a xgboost.Booster, got {type(model)} instead")
105
+ elif isinstance(model, xgboost.Booster):
106
+ model = to_remote_model(model, model_cls=Booster)
101
107
 
102
108
  output_types = [OutputType.tensor]
103
-
104
109
  iteration_range = iteration_range or (0, 0)
105
110
 
106
111
  return XGBPredict(
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Union
15
16
 
16
17
  from ..utils import make_import_error_func
17
18
  from .core import XGBScikitLearnBase, xgboost
@@ -19,15 +20,25 @@ from .core import XGBScikitLearnBase, xgboost
19
20
  if not xgboost:
20
21
  XGBRegressor = make_import_error_func("xgboost")
21
22
  else:
23
+ from xgboost.sklearn import XGBRegressorBase
24
+
22
25
  from .core import wrap_evaluation_matrices
23
26
  from .predict import predict
24
27
  from .train import train
25
28
 
26
- class XGBRegressor(XGBScikitLearnBase):
29
+ class XGBRegressor(XGBScikitLearnBase, XGBRegressorBase):
27
30
  """
28
31
  Implementation of the scikit-learn API for XGBoost regressor.
29
32
  """
30
33
 
34
+ def __init__(
35
+ self,
36
+ xgb_model: Union[xgboost.XGBRegressor, xgboost.Booster] = None,
37
+ **kwargs,
38
+ ):
39
+ super().__init__(**kwargs)
40
+ self._set_model(xgb_model)
41
+
31
42
  def fit(
32
43
  self,
33
44
  X,
@@ -41,6 +52,9 @@ else:
41
52
  ):
42
53
  session = kw.pop("session", None)
43
54
  run_kwargs = kw.pop("run_kwargs", dict())
55
+
56
+ self._n_features_in = X.shape[1]
57
+
44
58
  dtrain, evals = wrap_evaluation_matrices(
45
59
  None,
46
60
  X,
@@ -82,9 +82,8 @@ class XGBTrain(Operator, TileableOperatorMixin):
82
82
  inputs = [self.dtrain]
83
83
  if self.has_evals_result:
84
84
  inputs.extend(e[0] for e in self.evals)
85
- return self.new_tileables(
86
- inputs, object_class=Booster, evals_result=evals_result
87
- )[0]
85
+ kws = [{"object_class": Booster}, {}]
86
+ return self.new_tileables(inputs, kws=kws, evals_result=evals_result)[0]
88
87
 
89
88
  @property
90
89
  def output_limit(self):
@@ -129,4 +128,6 @@ def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwarg
129
128
  num_class=num_class,
130
129
  **kwargs,
131
130
  )(evals_result)
132
- return data.execute(session=session, **run_kwargs) if evals else data
131
+ if evals:
132
+ data.execute(session=session, **run_kwargs)
133
+ return data
@@ -11,4 +11,5 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from .dtypes import ArrowDtype, dict_, is_map_dtype
14
+
15
+ from .dtypes import ArrowDtype, dict_, is_list_dtype, is_map_dtype, list_
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  from typing import Union
15
16
 
16
17
  import pandas as pd
@@ -36,6 +37,15 @@ def dict_(
36
37
  return pd.ArrowDtype(pa.map_(key_type, item_type))
37
38
 
38
39
 
40
+ def list_(value_type: Union[pa.DataType, pa.Field]):
41
+ """
42
+ Create ``pd.ArrowDtype(pa.ListType)`` instance from a list or field.
43
+ """
44
+ if ArrowDtype is None:
45
+ raise ImportError("ArrowDtype is not supported in current environment")
46
+ return pd.ArrowDtype(pa.list_(value_type))
47
+
48
+
39
49
  def is_map_dtype(dtype: ArrowDtype) -> bool:
40
50
  """
41
51
  Check whether the dtype is a map type.
@@ -43,3 +53,14 @@ def is_map_dtype(dtype: ArrowDtype) -> bool:
43
53
  if ArrowDtype is None:
44
54
  raise ImportError("ArrowDtype is not supported in current environment")
45
55
  return isinstance(dtype, ArrowDtype) and isinstance(dtype.pyarrow_dtype, pa.MapType)
56
+
57
+
58
+ def is_list_dtype(dtype: ArrowDtype) -> bool:
59
+ """
60
+ Check whether the dtype is a list dtype.
61
+ """
62
+ if ArrowDtype is None:
63
+ raise ImportError("ArrowDtype is not supported in current environment")
64
+ return isinstance(dtype, ArrowDtype) and isinstance(
65
+ dtype.pyarrow_dtype, pa.ListType
66
+ )
@@ -17,18 +17,19 @@ import pyarrow as pa
17
17
  import pytest
18
18
 
19
19
  from ....utils import ARROW_DTYPE_NOT_SUPPORTED
20
- from ..dtypes import dict_, is_map_dtype
20
+ from ..dtypes import dict_, is_list_dtype, is_map_dtype, list_
21
21
 
22
22
  try:
23
23
  from pandas import ArrowDtype
24
24
  except:
25
25
  ArrowDtype = None
26
26
 
27
-
28
- @pytest.mark.skipif(
27
+ pytestmark = pytest.mark.skipif(
29
28
  ARROW_DTYPE_NOT_SUPPORTED,
30
29
  reason="pandas doesn't support ArrowDtype",
31
30
  )
31
+
32
+
32
33
  def test_map_dtype():
33
34
  dt = dict_(pa.int64(), pa.string())
34
35
  assert is_map_dtype(dt)
@@ -36,3 +37,12 @@ def test_map_dtype():
36
37
  dt = pd.ArrowDtype(pa.list_(pa.int64()))
37
38
  assert not is_map_dtype(dt)
38
39
  assert not is_map_dtype(pd.Int64Dtype)
40
+
41
+
42
+ def test_list_dtype():
43
+ dt = list_(pa.int64())
44
+ assert is_list_dtype(dt)
45
+
46
+ dt = pd.ArrowDtype(pa.map_(pa.int64(), pa.string()))
47
+ assert not is_list_dtype(dt)
48
+ assert not is_list_dtype(pd.Int64Dtype)
Binary file
maxframe/opcodes.py CHANGED
@@ -395,6 +395,11 @@ FUSE = 801
395
395
  # LLM
396
396
  DASHSCOPE_TEXT_GENERATION = 810
397
397
  DASHSCOPE_MULTI_MODAL_GENERATION = 811
398
+ MANAGED_TEXT_MODAL_GENERATION = 812
399
+ MANAGED_MULTI_MODAL_GENERATION = 813
400
+ LLM_TEXT_SUMMARIZE_TASK = 814
401
+ LLM_TEXT_TRANSLATE_TASK = 815
402
+ LLM_TEXT_CLASSIFY_TASK = 816
398
403
 
399
404
  # table like input for tensor
400
405
  TABLE_COO = 1003
@@ -575,16 +580,30 @@ DATAFRAME_RESHUFFLE = 10001
575
580
  FLATMAP = 10002
576
581
  FLATJSON = 10003
577
582
  APPLY_CHUNK = 10004
583
+
578
584
  SERIES_DICT_GETITEM = 10005
579
585
  SERIES_DICT_SETITEM = 10006
580
586
  SERIES_DICT_LENGTH = 10007
581
587
  SERIES_DICT_REMOVE = 10008
582
588
  SERIES_DICT_CONTAINS = 10009
589
+ SERIES_DICT_FLATTEN = 10010
590
+
591
+ SERIES_LIST_GETITEM = 10020
592
+ SERIES_LIST_SETITEM = 10021
593
+ SERIES_LIST_CONTAINS = 10022
594
+ SERIES_LIST_LENGTH = 10023
595
+ SERIES_LIST_INSERT = 10024
596
+ SERIES_LIST_EXTEND = 10025
597
+ SERIES_LIST_POP = 10026
598
+ SERIES_LIST_SORT = 10027
599
+ SERIES_LIST_FLATTEN = 10028
583
600
 
584
601
  # MaxFrame internal operators
585
602
  DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
586
603
  GROUPBY_AGGR_SAME_INDEX_MERGE = 100002
587
604
  DATAFRAME_ILOC_GET_AND_RENAME_ITEM = 100003
605
+ COLLECT_MODEL_RESULT = 100004
606
+ MODEL_DATA_SOURCE = 100005
588
607
 
589
608
  # fetches
590
609
  FETCH_SHUFFLE = 999998
@@ -16,6 +16,7 @@ from . import arrow, exception, maxframe_objects, numpy, pandas, scipy, serializ
16
16
  from .core import (
17
17
  PickleContainer,
18
18
  Serializer,
19
+ clear_type_cache,
19
20
  deserialize,
20
21
  load_type,
21
22
  pickle_buffers,
@@ -112,7 +112,14 @@ cpdef object load_type(str class_name, object parent_class):
112
112
 
113
113
  mod_name, cls_name = class_name.rsplit("#", 1)
114
114
 
115
- cls = importlib.import_module(mod_name)
115
+ try:
116
+ cls = importlib.import_module(mod_name)
117
+ except ImportError as ex:
118
+ raise ImportError(
119
+ f"Failed to import {mod_name} when loading "
120
+ f"class {class_name}, {ex}"
121
+ ) from None
122
+
116
123
  for sub_cls_name in cls_name.split("."):
117
124
  cls = getattr(cls, sub_cls_name)
118
125
  _type_cache[class_name] = cls
@@ -122,6 +129,10 @@ cpdef object load_type(str class_name, object parent_class):
122
129
  return cls
123
130
 
124
131
 
132
+ cpdef void clear_type_cache():
133
+ _type_cache.clear()
134
+
135
+
125
136
  cdef Serializer get_deserializer(int32_t deserializer_id):
126
137
  return _deserializers[deserializer_id]
127
138
 
@@ -70,10 +70,18 @@ class NDArraySerializer(Serializer):
70
70
  if dtype_new_order:
71
71
  dtype = dtype[dtype_new_order]
72
72
  if dtype.hasobject:
73
- shape = header["shape"]
74
- # fill empty object array
75
- val = np.empty(shape, dtype=dtype)
76
- val[(slice(None),) * len(shape)] = subs[0]
73
+ shape = tuple(header["shape"])
74
+ if shape == ():
75
+ val = np.array(subs[0]).reshape(shape)
76
+ else:
77
+ # fill empty object array
78
+ val = np.empty(shape, dtype=dtype)
79
+ try:
80
+ val[(slice(None),) * len(shape)] = subs[0]
81
+ except ValueError:
82
+ val[(slice(None),) * len(shape)] = np.array(
83
+ subs[0], dtype=dtype
84
+ ).reshape(shape)
77
85
  else:
78
86
  val = np.ndarray(
79
87
  shape=tuple(header["shape"]),
@@ -22,7 +22,7 @@ import pytest
22
22
  from ....core import EntityData
23
23
  from ....lib.wrapped_pickle import switch_unpickle
24
24
  from ....utils import no_default
25
- from ... import deserialize, serialize
25
+ from ... import clear_type_cache, deserialize, serialize
26
26
  from .. import (
27
27
  AnyField,
28
28
  BoolField,
@@ -202,6 +202,7 @@ def test_serializable(set_is_ci):
202
202
  def test_compatible_serializable(set_is_ci):
203
203
  global MySimpleSerializable, MySubSerializable
204
204
 
205
+ clear_type_cache()
205
206
  old_base, old_sub = MySimpleSerializable, MySubSerializable
206
207
 
207
208
  try:
@@ -231,13 +232,23 @@ def test_compatible_serializable(set_is_ci):
231
232
  my_sub_serializable2 = deserialize(header, buffers)
232
233
  assert type(my_sub_serializable) is not type(my_sub_serializable2)
233
234
  _assert_serializable_eq(my_sub_serializable, my_sub_serializable2)
235
+
236
+ header, buffers = serialize(my_sub_serializable2)
234
237
  finally:
235
238
  MySimpleSerializable, MySubSerializable = old_base, old_sub
239
+ MyMidSerializable = None
240
+ clear_type_cache()
241
+
242
+ my_sub_serializable3 = deserialize(header, buffers)
243
+ assert type(my_sub_serializable2) is not type(my_sub_serializable3)
244
+ _assert_serializable_eq(my_sub_serializable2, my_sub_serializable3)
236
245
 
237
246
 
238
247
  def _assert_serializable_eq(my_serializable, my_serializable2):
239
248
  for field_name, field in my_serializable._FIELDS.items():
240
- if not hasattr(my_serializable, field.name):
249
+ if not hasattr(my_serializable, field.name) or not hasattr(
250
+ my_serializable2, field.name
251
+ ):
241
252
  continue
242
253
  expect_value = getattr(my_serializable, field_name)
243
254
  if expect_value is no_default:
@@ -162,6 +162,8 @@ def test_timezones(val):
162
162
  @pytest.mark.parametrize(
163
163
  "val",
164
164
  [
165
+ np.array(1234),
166
+ np.array("str-val", dtype="O"),
165
167
  np.array([1024])[0],
166
168
  np.array(np.random.rand(100, 100)),
167
169
  np.array(np.random.rand(100, 100).T),
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import numpy as np
15
16
 
16
17
  from ... import opcodes
@@ -32,23 +32,24 @@ class TensorUnique(TensorHasInput, TensorOperatorMixin):
32
32
 
33
33
  @property
34
34
  def output_limit(self):
35
- return 1
35
+ return 1 + self.return_index + self.return_inverse + self.return_counts
36
36
 
37
- def _gen_kws(self, input_obj, chunk=False, chunk_index=None):
37
+ @classmethod
38
+ def _gen_kws(cls, op: "TensorUnique", input_obj, chunk=False, chunk_index=None):
38
39
  kws = []
39
40
 
40
41
  # unique tensor
41
42
  shape = list(input_obj.shape)
42
- shape[self.axis] = np.nan
43
+ shape[op.axis] = np.nan
43
44
  kw = {"shape": tuple(shape), "dtype": input_obj.dtype, "gpu": input_obj.op.gpu}
44
45
  if chunk:
45
46
  idx = [0] * len(shape)
46
- idx[self.axis] = chunk_index or 0
47
+ idx[op.axis] = chunk_index or 0
47
48
  kw["index"] = tuple(idx)
48
49
  kws.append(kw)
49
50
 
50
51
  # unique indices tensor
51
- if self.return_index:
52
+ if op.return_index:
52
53
  kw = {
53
54
  "shape": (np.nan,),
54
55
  "dtype": np.dtype(np.intp),
@@ -60,9 +61,9 @@ class TensorUnique(TensorHasInput, TensorOperatorMixin):
60
61
  kws.append(kw)
61
62
 
62
63
  # unique inverse tensor
63
- if self.return_inverse:
64
+ if op.return_inverse:
64
65
  kw = {
65
- "shape": (input_obj.shape[self.axis],),
66
+ "shape": (input_obj.shape[op.axis],),
66
67
  "dtype": np.dtype(np.intp),
67
68
  "gpu": input_obj.op.gpu,
68
69
  "type": "inverse",
@@ -72,7 +73,7 @@ class TensorUnique(TensorHasInput, TensorOperatorMixin):
72
73
  kws.append(kw)
73
74
 
74
75
  # unique counts tensor
75
- if self.return_counts:
76
+ if op.return_counts:
76
77
  kw = {
77
78
  "shape": (np.nan,),
78
79
  "dtype": np.dtype(int),
@@ -92,9 +93,9 @@ class TensorUnique(TensorHasInput, TensorOperatorMixin):
92
93
  if self.axis is None:
93
94
  if ar.ndim > 1:
94
95
  ar = ar.flatten()
95
- self._axis = 0
96
+ self.axis = 0
96
97
  else:
97
- self._axis = validate_axis(ar.ndim, self._axis)
98
+ self.axis = validate_axis(ar.ndim, self.axis)
98
99
 
99
100
  kws = self._gen_kws(self, ar)
100
101
  tensors = self.new_tensors([ar], kws=kws, order=TensorOrder.C_ORDER)
@@ -181,7 +181,10 @@ def _reshape(a, newshape, order="C", tensor_order=None, out_shape=None):
181
181
  if tensor_order is None:
182
182
  tensor_order = get_order(order, a.order, available_options="CFA")
183
183
  op = TensorReshape(
184
- newshape, order, dtype=a.dtype, create_view=tensor_order == a.order
184
+ newshape=newshape,
185
+ order=order,
186
+ dtype=a.dtype,
187
+ create_view=tensor_order == a.order,
185
188
  )
186
189
  if out_shape is None:
187
190
  out_shape = newshape
maxframe/utils.py CHANGED
@@ -1140,3 +1140,7 @@ def is_empty(val):
1140
1140
  if isinstance(val, (pd.DataFrame, pd.Series, pd.Index)):
1141
1141
  return val.empty
1142
1142
  return not bool(val)
1143
+
1144
+
1145
+ def get_default_table_properties():
1146
+ return {"storagestrategy": "archive"}
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: maxframe
3
- Version: 1.2.1
3
+ Version: 1.3.0
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy<2.0.0,>=1.19.0
6
6
  Requires-Dist: pandas>=1.0.0