maxframe 2.2.0__cp38-cp38-win_amd64.whl → 2.3.0rc1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (114) hide show
  1. maxframe/_utils.cp38-win_amd64.pyd +0 -0
  2. maxframe/codegen/core.py +3 -2
  3. maxframe/codegen/spe/dataframe/merge.py +4 -0
  4. maxframe/codegen/spe/dataframe/misc.py +2 -0
  5. maxframe/codegen/spe/dataframe/reduction.py +18 -0
  6. maxframe/codegen/spe/dataframe/sort.py +9 -1
  7. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  8. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  9. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  10. maxframe/codegen/spe/tensor/datasource.py +1 -0
  11. maxframe/config/config.py +3 -0
  12. maxframe/conftest.py +10 -0
  13. maxframe/core/base.py +2 -1
  14. maxframe/core/entity/tileables.py +2 -0
  15. maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
  16. maxframe/core/graph/entity.py +7 -1
  17. maxframe/core/mode.py +6 -1
  18. maxframe/dataframe/__init__.py +2 -2
  19. maxframe/dataframe/arithmetic/__init__.py +4 -0
  20. maxframe/dataframe/arithmetic/maximum.py +33 -0
  21. maxframe/dataframe/arithmetic/minimum.py +33 -0
  22. maxframe/dataframe/core.py +98 -106
  23. maxframe/dataframe/datasource/core.py +6 -0
  24. maxframe/dataframe/datasource/direct.py +57 -0
  25. maxframe/dataframe/datasource/read_csv.py +19 -11
  26. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  27. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  28. maxframe/dataframe/datasource/read_parquet.py +38 -39
  29. maxframe/dataframe/datastore/__init__.py +6 -0
  30. maxframe/dataframe/datastore/direct.py +268 -0
  31. maxframe/dataframe/datastore/to_odps.py +6 -0
  32. maxframe/dataframe/extensions/flatjson.py +2 -1
  33. maxframe/dataframe/groupby/__init__.py +5 -1
  34. maxframe/dataframe/groupby/aggregation.py +10 -6
  35. maxframe/dataframe/groupby/apply_chunk.py +1 -3
  36. maxframe/dataframe/groupby/core.py +20 -4
  37. maxframe/dataframe/indexing/__init__.py +2 -1
  38. maxframe/dataframe/indexing/insert.py +45 -17
  39. maxframe/dataframe/merge/__init__.py +3 -0
  40. maxframe/dataframe/merge/combine.py +244 -0
  41. maxframe/dataframe/misc/__init__.py +14 -3
  42. maxframe/dataframe/misc/check_unique.py +41 -10
  43. maxframe/dataframe/misc/drop.py +31 -0
  44. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  45. maxframe/dataframe/misc/map.py +31 -18
  46. maxframe/dataframe/misc/repeat.py +159 -0
  47. maxframe/dataframe/misc/tests/test_misc.py +35 -1
  48. maxframe/dataframe/missing/checkna.py +3 -2
  49. maxframe/dataframe/reduction/__init__.py +10 -5
  50. maxframe/dataframe/reduction/aggregation.py +6 -6
  51. maxframe/dataframe/reduction/argmax.py +7 -4
  52. maxframe/dataframe/reduction/argmin.py +7 -4
  53. maxframe/dataframe/reduction/core.py +18 -9
  54. maxframe/dataframe/reduction/mode.py +144 -0
  55. maxframe/dataframe/reduction/nunique.py +10 -3
  56. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  57. maxframe/dataframe/sort/__init__.py +9 -2
  58. maxframe/dataframe/sort/argsort.py +7 -1
  59. maxframe/dataframe/sort/core.py +1 -1
  60. maxframe/dataframe/sort/rank.py +147 -0
  61. maxframe/dataframe/tseries/__init__.py +19 -0
  62. maxframe/dataframe/tseries/at_time.py +61 -0
  63. maxframe/dataframe/tseries/between_time.py +122 -0
  64. maxframe/dataframe/utils.py +30 -26
  65. maxframe/learn/contrib/llm/core.py +16 -7
  66. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  67. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  68. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  69. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  70. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  71. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  72. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  73. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  74. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  75. maxframe/learn/contrib/llm/models/managed.py +76 -11
  76. maxframe/learn/contrib/llm/models/openai.py +72 -0
  77. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  78. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  79. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  80. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  81. maxframe/learn/contrib/llm/text.py +348 -42
  82. maxframe/learn/contrib/models.py +4 -1
  83. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  84. maxframe/learn/contrib/xgboost/core.py +31 -7
  85. maxframe/learn/contrib/xgboost/predict.py +4 -2
  86. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  87. maxframe/learn/contrib/xgboost/train.py +2 -0
  88. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  89. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  90. maxframe/learn/utils/__init__.py +1 -0
  91. maxframe/learn/utils/extmath.py +42 -9
  92. maxframe/learn/utils/odpsio.py +80 -11
  93. maxframe/lib/filesystem/_oss_lib/common.py +2 -0
  94. maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
  95. maxframe/opcodes.py +9 -1
  96. maxframe/remote/core.py +4 -0
  97. maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
  98. maxframe/serialization/tests/test_serial.py +2 -2
  99. maxframe/tensor/arithmetic/__init__.py +1 -1
  100. maxframe/tensor/arithmetic/core.py +2 -2
  101. maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
  102. maxframe/tensor/core.py +3 -0
  103. maxframe/tensor/misc/copyto.py +1 -1
  104. maxframe/tests/test_udf.py +61 -0
  105. maxframe/tests/test_utils.py +8 -5
  106. maxframe/udf.py +103 -7
  107. maxframe/utils.py +61 -8
  108. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
  109. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +113 -90
  110. maxframe_client/session/task.py +8 -1
  111. maxframe_client/tests/test_session.py +24 -0
  112. maxframe/dataframe/arrays.py +0 -864
  113. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  114. {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -14,6 +14,7 @@
14
14
 
15
15
  from typing import Union
16
16
 
17
+ from ...utils.odpsio import register_odps_model
17
18
  from ..utils import make_import_error_func
18
19
  from .core import XGBScikitLearnBase, xgboost
19
20
 
@@ -24,6 +25,7 @@ else:
24
25
 
25
26
  from .predict import predict
26
27
 
28
+ @register_odps_model
27
29
  class XGBRegressor(XGBScikitLearnBase, XGBRegressorBase):
28
30
  """
29
31
  Implementation of the scikit-learn API for XGBoost regressor.
@@ -69,6 +71,9 @@ else:
69
71
  A list of the form [L_1, L_2, ..., L_n], where each L_i is a list
70
72
  of group weights on the i-th validation set.
71
73
  """
74
+ if y.ndim == 2:
75
+ kw["num_class"] = y.shape[1]
76
+ kw["output_ndim"] = 2
72
77
  super().fit(
73
78
  X,
74
79
  y,
@@ -25,6 +25,7 @@ from ....serialization.serializables import (
25
25
  DictField,
26
26
  FieldTypes,
27
27
  FunctionField,
28
+ Int16Field,
28
29
  Int64Field,
29
30
  KeyField,
30
31
  ListField,
@@ -65,6 +66,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
65
66
  num_boost_round = Int64Field("num_boost_round", default=10)
66
67
  num_class = Int64Field("num_class", default=None)
67
68
  _has_evals_result = BoolField("has_evals_result", default=False)
69
+ output_ndim = Int16Field("output_ndim", default=None)
68
70
 
69
71
  def __init__(self, gpu=None, **kw):
70
72
  if kw.get("evals_result") is not None:
@@ -106,10 +106,11 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
106
106
  <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
107
107
  """
108
108
 
109
- def __init__(self, feature_range=(0, 1), copy=True, clip=False):
109
+ def __init__(self, feature_range=(0, 1), copy=True, clip=False, validate=True):
110
110
  self.feature_range = feature_range
111
111
  self.copy = copy
112
112
  self.clip = clip
113
+ self.validate = validate
113
114
 
114
115
  def _reset(self): # pragma: no cover
115
116
  """Reset internal data-dependent state of the scaler, if necessary.
@@ -186,13 +187,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
186
187
  )
187
188
 
188
189
  first_pass = not hasattr(self, "n_samples_seen_")
189
- X = self._validate_data(
190
- X,
191
- reset=first_pass,
192
- estimator=self,
193
- dtype=FLOAT_DTYPES,
194
- force_all_finite="allow-nan",
195
- )
190
+ if self.validate:
191
+ X = self._validate_data(
192
+ X,
193
+ reset=first_pass,
194
+ estimator=self,
195
+ dtype=FLOAT_DTYPES,
196
+ force_all_finite="allow-nan",
197
+ )
196
198
 
197
199
  if isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE, INDEX_TYPE)):
198
200
  data_min = X.min(axis=0)
@@ -239,13 +241,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
239
241
  """
240
242
  check_is_fitted(self)
241
243
 
242
- X = self._validate_data(
243
- X,
244
- copy=self.copy,
245
- dtype=FLOAT_DTYPES,
246
- force_all_finite="allow-nan",
247
- reset=False,
248
- )
244
+ if self.validate:
245
+ X = self._validate_data(
246
+ X,
247
+ copy=self.copy,
248
+ dtype=FLOAT_DTYPES,
249
+ force_all_finite="allow-nan",
250
+ reset=False,
251
+ )
249
252
 
250
253
  X *= self.scale_
251
254
  X += self.min_
@@ -290,6 +293,7 @@ def minmax_scale(
290
293
  *,
291
294
  axis=0,
292
295
  copy=True,
296
+ validate=True,
293
297
  execute=False,
294
298
  session=None,
295
299
  run_kwargs=None
@@ -368,21 +372,28 @@ def minmax_scale(
368
372
  """ # noqa
369
373
  # Unlike the scaler object, this function allows 1d input.
370
374
  # If copy is required, it will be done inside the scaler object.
371
- X = check_array(
372
- X, copy=False, ensure_2d=False, dtype=FLOAT_DTYPES, force_all_finite="allow-nan"
373
- )
374
- original_ndim = X.ndim
375
+ if validate:
376
+ X = check_array(
377
+ X,
378
+ copy=False,
379
+ ensure_2d=False,
380
+ dtype=FLOAT_DTYPES,
381
+ force_all_finite="allow-nan",
382
+ )
383
+ original_ndim = X.ndim
375
384
 
376
- if original_ndim == 1:
377
- X = X.reshape(X.shape[0], 1)
385
+ if original_ndim == 1:
386
+ X = X.reshape(X.shape[0], 1)
387
+ else:
388
+ original_ndim = X.ndim
378
389
 
379
- s = MinMaxScaler(feature_range=feature_range, copy=copy)
390
+ s = MinMaxScaler(feature_range=feature_range, copy=copy, validate=validate)
380
391
  if axis == 0:
381
392
  X = s.fit_transform(X)
382
393
  else:
383
394
  X = s.fit_transform(X.T).T
384
395
 
385
- if original_ndim == 1:
396
+ if validate and original_ndim == 1:
386
397
  X = X.ravel()
387
398
 
388
399
  if not execute:
@@ -156,10 +156,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
156
156
  [[3. 3.]]
157
157
  """
158
158
 
159
- def __init__(self, *, copy=True, with_mean=True, with_std=True):
159
+ def __init__(self, *, copy=True, with_mean=True, with_std=True, validate=True):
160
160
  self.with_mean = with_mean
161
161
  self.with_std = with_std
162
162
  self.copy = copy
163
+ self.validate = validate
163
164
 
164
165
  def _reset(self):
165
166
  """Reset internal data-dependent state of the scaler, if necessary.
@@ -246,14 +247,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
246
247
  Fitted scaler.
247
248
  """
248
249
  first_call = not hasattr(self, "n_samples_seen_")
249
- X = self._validate_data(
250
- X,
251
- accept_sparse=("csr", "csc"),
252
- dtype=FLOAT_DTYPES,
253
- force_all_finite="allow-nan",
254
- reset=first_call,
255
- )
256
- n_features = X.shape[1]
250
+ if self.validate:
251
+ X = self._validate_data(
252
+ X,
253
+ accept_sparse=("csr", "csc"),
254
+ dtype=FLOAT_DTYPES,
255
+ force_all_finite="allow-nan",
256
+ reset=first_call,
257
+ )
258
+ n_features = X.shape[1] if X.ndim == 2 else 1
257
259
 
258
260
  if sample_weight is not None:
259
261
  sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
@@ -267,7 +269,9 @@ class StandardScaler(TransformerMixin, BaseEstimator):
267
269
  # incr_mean_variance_axis and _incremental_variance_axis
268
270
  dtype = np.int64 if sample_weight is None else X.dtype
269
271
  if not hasattr(self, "n_samples_seen_"):
270
- self.n_samples_seen_ = mt.zeros(n_features, dtype=dtype)
272
+ self.n_samples_seen_ = (
273
+ mt.zeros(n_features, dtype=dtype) if X.ndim == 2 else 0
274
+ )
271
275
  # elif np.size(self.n_samples_seen_) == 1:
272
276
  # self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
273
277
  # self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
@@ -309,9 +313,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
309
313
  constant_mask = _is_constant_feature(
310
314
  self.var_, self.mean_, self.n_samples_seen_
311
315
  )
312
- self.scale_ = _handle_zeros_in_scale(
313
- mt.sqrt(self.var_), copy=False, constant_mask=constant_mask
314
- )
316
+ self.scale_ = mt.sqrt(self.var_)
317
+ if self.validate:
318
+ self.scale_ = _handle_zeros_in_scale(
319
+ self.scale_, copy=False, constant_mask=constant_mask
320
+ )
315
321
  else:
316
322
  self.scale_ = None
317
323
 
@@ -337,14 +343,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
337
343
  check_is_fitted(self)
338
344
 
339
345
  copy = copy if copy is not None else self.copy
340
- X = self._validate_data(
341
- X,
342
- reset=False,
343
- accept_sparse="csr",
344
- copy=copy,
345
- dtype=FLOAT_DTYPES,
346
- force_all_finite="allow-nan",
347
- )
346
+ if self.validate:
347
+ X = self._validate_data(
348
+ X,
349
+ reset=False,
350
+ accept_sparse="csr",
351
+ copy=copy,
352
+ dtype=FLOAT_DTYPES,
353
+ force_all_finite="allow-nan",
354
+ )
348
355
 
349
356
  if sparse.issparse(X):
350
357
  raise NotImplementedError("Scaling on sparse tensors is not supported")
@@ -397,7 +404,7 @@ class StandardScaler(TransformerMixin, BaseEstimator):
397
404
  return X
398
405
 
399
406
 
400
- def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
407
+ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True, validate=True):
401
408
  """Standardize a dataset along any axis.
402
409
 
403
410
  Center to the mean and component wise scale to unit variance.
@@ -488,16 +495,18 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
488
495
  X = mt.tensor(X)
489
496
 
490
497
  ndim = X.ndim
491
- if ndim == 1:
498
+ if validate and ndim == 1:
492
499
  X = X.reshape((X.shape[0], 1))
493
500
  if axis == 1:
494
501
  X = X.T
495
502
 
496
- scaler = StandardScaler(with_mean=with_mean, with_std=with_std, copy=copy)
503
+ scaler = StandardScaler(
504
+ with_mean=with_mean, with_std=with_std, copy=copy, validate=validate
505
+ )
497
506
  transformed = scaler.fit_transform(X)
498
507
 
499
508
  if axis == 1:
500
509
  transformed = transformed.T
501
- if ndim == 1:
510
+ if validate and ndim == 1:
502
511
  transformed = transformed.reshape(transformed.shape[0])
503
512
  return transformed
@@ -14,6 +14,7 @@
14
14
 
15
15
  from .core import convert_to_tensor_or_dataframe
16
16
  from .multiclass import check_classification_targets
17
+ from .odpsio import read_odps_model
17
18
  from .shuffle import shuffle
18
19
  from .sparsefuncs import count_nonzero
19
20
  from .validation import check_array, check_consistent_length
@@ -15,6 +15,9 @@
15
15
  import numpy as np
16
16
 
17
17
  from ... import tensor as mt
18
+ from ...core import ENTITY_TYPE
19
+ from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
20
+ from ...tensor.datasource import TensorZeros
18
21
 
19
22
 
20
23
  # Use at least float64 for the accumulating functions to avoid precision issue
@@ -42,7 +45,11 @@ def _safe_accumulator_op(op, x, *args, **kwargs):
42
45
  -------
43
46
  result : The output of the accumulator function passed to this function
44
47
  """
45
- if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:
48
+ if (
49
+ hasattr(x, "dtype")
50
+ and np.issubdtype(x.dtype, np.floating)
51
+ and x.dtype.itemsize < 8
52
+ ):
46
53
  result = op(x, *args, **kwargs, dtype=np.float64)
47
54
  else:
48
55
  result = op(x, *args, **kwargs)
@@ -117,16 +124,31 @@ def _incremental_mean_and_var(
117
124
  `utils.sparsefuncs.incr_mean_variance_axis` and
118
125
  `utils.sparsefuncs_fast.incr_mean_variance_axis0`
119
126
  """
127
+ has_last_sample = isinstance(last_sample_count, ENTITY_TYPE) and not isinstance(
128
+ last_sample_count.op, TensorZeros
129
+ )
130
+ is_df_type = isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE))
131
+
120
132
  # old = stats until now
121
133
  # new = the current increment
122
134
  # updated = the aggregated stats
123
- last_sum = last_mean * last_sample_count
135
+ last_sum = last_mean * last_sample_count if has_last_sample else 0
124
136
  X_nan_mask = mt.isnan(X)
125
137
  # if mt.any(X_nan_mask):
126
138
  # sum_op = mt.nansum
127
139
  # else:
128
140
  # sum_op = mt.sum
129
- sum_op = mt.nansum
141
+
142
+ def df_sum(val, **kw):
143
+ if "dtype" in kw:
144
+ val = val.astype(kw.pop("dtype"))
145
+ return val.sum(**kw)
146
+
147
+ if is_df_type:
148
+ sum_op = df_sum
149
+ else:
150
+ sum_op = mt.nansum
151
+
130
152
  if sample_weight is not None:
131
153
  # equivalent to np.nansum(X * sample_weight, axis=0)
132
154
  # safer because np.float64(X*W) != np.float64(X)*np.float64(W)
@@ -138,10 +160,16 @@ def _incremental_mean_and_var(
138
160
  )
139
161
  else:
140
162
  new_sum = _safe_accumulator_op(sum_op, X, axis=0)
141
- n_samples = X.shape[0]
142
- new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
163
+ if is_df_type:
164
+ new_sample_count = X.count()
165
+ else:
166
+ n_samples = X.shape[0]
167
+ new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
143
168
 
144
- updated_sample_count = last_sample_count + new_sample_count
169
+ if not has_last_sample:
170
+ updated_sample_count = new_sample_count
171
+ else:
172
+ updated_sample_count = last_sample_count + new_sample_count
145
173
 
146
174
  updated_mean = (last_sum + new_sum) / updated_sample_count
147
175
 
@@ -170,7 +198,9 @@ def _incremental_mean_and_var(
170
198
  # and recommendations", by Chan, Golub, and LeVeque.
171
199
  new_unnormalized_variance -= correction**2 / new_sample_count
172
200
 
173
- last_unnormalized_variance = last_variance * last_sample_count
201
+ last_unnormalized_variance = (
202
+ last_variance * last_sample_count if has_last_sample else 0
203
+ )
174
204
 
175
205
  with mt.errstate(divide="ignore", invalid="ignore"):
176
206
  last_over_new_count = last_sample_count / new_sample_count
@@ -182,8 +212,11 @@ def _incremental_mean_and_var(
182
212
  * (last_sum / last_over_new_count - new_sum) ** 2
183
213
  )
184
214
 
185
- zeros = last_sample_count == 0
186
- updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
215
+ if not has_last_sample:
216
+ updated_unnormalized_variance = new_unnormalized_variance
217
+ else:
218
+ zeros = last_sample_count == 0
219
+ updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
187
220
  updated_variance = updated_unnormalized_variance / updated_sample_count
188
221
 
189
222
  return updated_mean, updated_variance, updated_sample_count
@@ -12,7 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict, List, NamedTuple, Optional
15
+ from typing import Any, Dict, List, NamedTuple, Optional, Set
16
+
17
+ from odps import ODPS
16
18
 
17
19
  from ... import opcodes
18
20
  from ...core import ENTITY_TYPE, EntityData, OutputType
@@ -27,6 +29,36 @@ from ...serialization.serializables import (
27
29
  from ...utils import find_objects, replace_objects
28
30
  from ..core import LearnOperatorMixin
29
31
 
32
+ _odps_model_classes: Set["ODPSModelMixin"] = set()
33
+
34
+
35
+ def register_odps_model(model_cls: "ODPSModelMixin"):
36
+ _odps_model_classes.add(model_cls)
37
+ return model_cls
38
+
39
+
40
+ class ReadODPSModel(ObjectOperator, LearnOperatorMixin):
41
+ _op_type_ = opcodes.READ_ODPS_MODEL
42
+
43
+ model_name = StringField("model_name", default=None)
44
+ model_version = StringField("model_version", default=None)
45
+ format = StringField("format", default=None)
46
+ location = StringField("location", default=None)
47
+ storage_options = DictField("storage_options", default=None)
48
+
49
+ def has_custom_code(self) -> bool:
50
+ return True
51
+
52
+ def __call__(self):
53
+ if not self.format.startswith("BOOSTED_TREE_"):
54
+ # todo support more model formats
55
+ raise ValueError("Only support boosted tree format")
56
+ for model_cls in _odps_model_classes:
57
+ ret = model_cls._build_odps_source_model(self)
58
+ if ret is not None:
59
+ return ret
60
+ raise ValueError(f"Model {self.model_name} not supported")
61
+
30
62
 
31
63
  class ToODPSModel(ObjectOperator, LearnOperatorMixin):
32
64
  _op_type_ = opcodes.TO_ODPS_MODEL
@@ -74,17 +106,21 @@ class ToODPSModel(ObjectOperator, LearnOperatorMixin):
74
106
  return self.new_tileable(inputs, shape=())
75
107
 
76
108
 
77
- class ToODPSModelMixin:
109
+ class ODPSModelMixin:
78
110
  class ODPSModelInfo(NamedTuple):
79
111
  model_format: str
80
112
  model_params: Any
81
113
 
114
+ @classmethod
115
+ def _build_odps_source_model(cls, op: ReadODPSModel) -> Any:
116
+ return None
117
+
82
118
  def _get_odps_model_info(self) -> ODPSModelInfo:
83
119
  raise NotImplementedError
84
120
 
85
121
  def to_odps_model(
86
122
  self,
87
- model_name: str = None,
123
+ model_name: str,
88
124
  model_version: str = None,
89
125
  schema: str = None,
90
126
  project: str = None,
@@ -167,14 +203,7 @@ class ToODPSModelMixin:
167
203
  ... "role_arn": "acs:ram::<user_id>:role/aliyunodpsdefaultrole"
168
204
  ... }).execute()
169
205
  """
170
- if "." not in model_name:
171
- if project and not schema:
172
- schema = "default"
173
- if schema:
174
- model_name = f"{schema}.{model_name}"
175
- if project:
176
- model_name = f"{project}.{model_name}"
177
-
206
+ model_name = _build_odps_model_name(model_name, schema, project)
178
207
  model_info = self._get_odps_model_info()
179
208
 
180
209
  op = ToODPSModel(
@@ -191,3 +220,43 @@ class ToODPSModelMixin:
191
220
  storage_options=storage_options,
192
221
  )
193
222
  return op(getattr(self, "training_info_"), model_info.model_params)
223
+
224
+
225
+ def _build_odps_model_name(model_name: str, schema: str, project: str = None):
226
+ if "." not in model_name:
227
+ if project and not schema:
228
+ schema = "default"
229
+ if schema:
230
+ model_name = f"{schema}.{model_name}"
231
+ if project:
232
+ model_name = f"{project}.{model_name}"
233
+ return model_name
234
+
235
+
236
+ def read_odps_model(
237
+ model_name: str,
238
+ schema: str = None,
239
+ project: str = None,
240
+ model_version: str = None,
241
+ odps_entry: ODPS = None,
242
+ ):
243
+ odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
244
+ if not hasattr(odps_entry, "get_model"):
245
+ raise RuntimeError("Need to install pyodps>=0.11.5 to use read_odps_model")
246
+
247
+ model_obj = odps_entry.get_model(model_name, project, schema)
248
+ if model_version:
249
+ model_obj = model_obj.versions[model_version]
250
+ # check if model exists
251
+ model_obj.reload()
252
+
253
+ full_model_name = _build_odps_model_name(model_name, schema, project)
254
+ location = model_obj.path
255
+ format = model_obj.type.value
256
+ op = ReadODPSModel(
257
+ model_name=full_model_name,
258
+ model_version=model_version,
259
+ location=location,
260
+ format=format,
261
+ )
262
+ return op()
@@ -40,6 +40,8 @@ class OSSFileEntry:
40
40
  self._storage_options = storage_options
41
41
 
42
42
  def is_dir(self):
43
+ if self._path.endswith("/"):
44
+ self._is_dir = True
43
45
  if self._is_dir is None:
44
46
  self._is_dir = oss_isdir(self._path)
45
47
  return self._is_dir
Binary file
maxframe/opcodes.py CHANGED
@@ -271,9 +271,9 @@ SEM = 352
271
271
  STR_CONCAT = 353
272
272
  MAD = 354
273
273
  MEDIAN = 355
274
- RANK = 356
275
274
  IDXMAX = 357
276
275
  IDXMIN = 358
276
+ MODE = 359
277
277
 
278
278
  # tensor operator
279
279
  RESHAPE = 401
@@ -398,6 +398,9 @@ REORDER_LEVELS = 747
398
398
  DATAFRAME_COMPARE = 748
399
399
  DROPLEVEL = 749
400
400
  DATAFRAME_UPDATE = 750
401
+ DATAFRAME_COMBINE = 751
402
+ DATAFRAME_INFER_DTYPES = 752
403
+ BETWEEN_TIME = 753
401
404
 
402
405
  FUSE = 801
403
406
 
@@ -409,6 +412,9 @@ MANAGED_MULTI_MODAL_GENERATION = 813
409
412
  LLM_TEXT_SUMMARIZE_TASK = 814
410
413
  LLM_TEXT_TRANSLATE_TASK = 815
411
414
  LLM_TEXT_CLASSIFY_TASK = 816
415
+ LLM_TEXT_EXTRACT_TASK = 817
416
+ LLM_TEXT_EMBEDDING_TASK = 818
417
+ OPENAI_COMPATIBLE_TEXT_GENERATION = 819
412
418
 
413
419
  # table like input for tensor
414
420
  TABLE_COO = 1003
@@ -456,6 +462,7 @@ PSRS_SORT_REGULAR_SAMPLE = 2040
456
462
  PSRS_CONCAT_PIVOT = 2041
457
463
  PSRS_SHUFFLE = 2042
458
464
  PSRS_ALIGN = 2043
465
+ PSRS_RANK_SHUFFLE = 2044
459
466
  # partition
460
467
  CALC_PARTITIONS_INFO = 2046
461
468
  PARTITION_MERGED = 2047
@@ -463,6 +470,7 @@ PARTITION_MERGED = 2047
463
470
  # dataframe sort
464
471
  SORT_VALUES = 2050
465
472
  SORT_INDEX = 2051
473
+ RANK = 2052
466
474
 
467
475
  # window
468
476
  ROLLING_AGG = 2060
maxframe/remote/core.py CHANGED
@@ -27,6 +27,7 @@ from ..serialization.serializables import (
27
27
  ListField,
28
28
  )
29
29
  from ..tensor.core import TENSOR_TYPE
30
+ from ..typing_ import TileableType
30
31
  from ..udf import BuiltinFunction
31
32
  from ..utils import find_objects, replace_objects
32
33
 
@@ -59,6 +60,9 @@ class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
59
60
  def has_custom_code(self) -> bool:
60
61
  return not isinstance(self.function, BuiltinFunction)
61
62
 
63
+ def check_inputs(self, inputs: List[TileableType]):
64
+ return
65
+
62
66
  @classmethod
63
67
  def _set_inputs(cls, op: "RemoteFunction", inputs: List[EntityData]):
64
68
  raw_inputs = getattr(op, "_inputs", None)
@@ -239,11 +239,11 @@ def test_pandas():
239
239
  @pytest.mark.skipif(_arrow_dtype_supported, reason="pandas doesn't support ArrowDtype")
240
240
  def test_fake_arrow_dtype_serde():
241
241
  serializer = DtypeSerializer()
242
- payload, data, ok = serializer.serial(
242
+ payload, data, is_leaf = serializer.serial(
243
243
  FakeArrowDtype(pa.map_(pa.int64(), pa.string())), dict()
244
244
  )
245
245
 
246
- assert ok
246
+ assert is_leaf
247
247
  assert data == []
248
248
  assert payload == ["PA", "map<int64, string>"]
249
249
  new_dtype = serializer.deserial(payload, dict(), list())
@@ -154,7 +154,7 @@ def _install():
154
154
  def inner(lhs, rhs, **kwargs):
155
155
  ret = func(lhs, rhs, **kwargs)
156
156
  if isinstance(ret, TENSOR_TYPE):
157
- ret.op.magic = True
157
+ ret.op.extra_params["magic"] = True
158
158
  return ret
159
159
 
160
160
  return inner
@@ -415,8 +415,8 @@ class TensorOutBinOp(TensorOperator, TensorElementWiseWithInputs):
415
415
  dtype = [r.dtype for r in self._fun(np.empty(1, dtype=x.dtype))]
416
416
 
417
417
  out = out or (None, None)
418
- out1 = out1 or out[0]
419
- out2 = out2 or out[1]
418
+ out1 = out1 if out1 is not None else out[0]
419
+ out2 = out2 if out2 is not None else out[1]
420
420
  x, out1, out2, where = self._process_inputs(x, out1, out2, where)
421
421
  shape = x.shape
422
422
  order1 = self._calc_order(x, out1)
@@ -16,7 +16,6 @@ import numpy as np
16
16
  import pytest
17
17
  import scipy.sparse as sps
18
18
 
19
- from ....core import enter_mode
20
19
  from ....utils import collect_leaf_operators
21
20
  from ...core import SparseTensor, Tensor
22
21
  from ...datasource import array, empty, ones, tensor
@@ -391,14 +390,6 @@ def test_get_set_real():
391
390
  a.real = [2, 4]
392
391
 
393
392
 
394
- def test_build_mode():
395
- t1 = ones((2, 3), chunk_size=2)
396
- assert t1 == 2
397
-
398
- with enter_mode(build=True):
399
- assert t1 != 2
400
-
401
-
402
393
  def test_unary_op_func_name():
403
394
  # make sure all the unary op has defined the func name.
404
395
 
maxframe/tensor/core.py CHANGED
@@ -251,6 +251,9 @@ class Tensor(HasShapeTileable):
251
251
  def __len__(self):
252
252
  return len(self._data)
253
253
 
254
+ def __bool__(self):
255
+ return True if is_build_mode() else bool(self.to_numpy())
256
+
254
257
  @property
255
258
  def shape(self):
256
259
  return self._data.shape
@@ -83,7 +83,7 @@ class TensorCopyTo(TensorOperator, TensorOperatorMixin):
83
83
  "could not broadcast input array "
84
84
  f"from shape {src.shape!r} into shape {dst.shape!r}"
85
85
  )
86
- if where:
86
+ if where is not None:
87
87
  try:
88
88
  broadcast_to(where, dst.shape)
89
89
  except ValueError: