maxframe 0.1.0b4__cp311-cp311-win32.whl → 1.0.0__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win32.pyd +0 -0
  3. maxframe/codegen.py +56 -5
  4. maxframe/config/config.py +78 -10
  5. maxframe/config/validators.py +42 -11
  6. maxframe/conftest.py +58 -14
  7. maxframe/core/__init__.py +2 -16
  8. maxframe/core/entity/__init__.py +1 -12
  9. maxframe/core/entity/executable.py +1 -1
  10. maxframe/core/entity/objects.py +46 -45
  11. maxframe/core/entity/output_types.py +0 -3
  12. maxframe/core/entity/tests/test_objects.py +43 -0
  13. maxframe/core/entity/tileables.py +5 -78
  14. maxframe/core/graph/__init__.py +2 -2
  15. maxframe/core/graph/builder/__init__.py +0 -1
  16. maxframe/core/graph/builder/base.py +5 -4
  17. maxframe/core/graph/builder/tileable.py +4 -4
  18. maxframe/core/graph/builder/utils.py +4 -8
  19. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  20. maxframe/core/graph/core.pyx +4 -4
  21. maxframe/core/graph/entity.py +9 -33
  22. maxframe/core/operator/__init__.py +2 -9
  23. maxframe/core/operator/base.py +3 -5
  24. maxframe/core/operator/objects.py +0 -9
  25. maxframe/core/operator/utils.py +55 -0
  26. maxframe/dataframe/__init__.py +2 -1
  27. maxframe/dataframe/arithmetic/around.py +5 -17
  28. maxframe/dataframe/arithmetic/core.py +15 -7
  29. maxframe/dataframe/arithmetic/docstring.py +7 -33
  30. maxframe/dataframe/arithmetic/equal.py +4 -2
  31. maxframe/dataframe/arithmetic/greater.py +4 -2
  32. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  33. maxframe/dataframe/arithmetic/less.py +2 -2
  34. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  36. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  37. maxframe/dataframe/core.py +58 -12
  38. maxframe/dataframe/datasource/date_range.py +2 -2
  39. maxframe/dataframe/datasource/read_odps_query.py +120 -24
  40. maxframe/dataframe/datasource/read_odps_table.py +9 -4
  41. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  42. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  43. maxframe/dataframe/datastore/to_odps.py +28 -0
  44. maxframe/dataframe/extensions/__init__.py +5 -0
  45. maxframe/dataframe/extensions/flatjson.py +131 -0
  46. maxframe/dataframe/extensions/flatmap.py +317 -0
  47. maxframe/dataframe/extensions/reshuffle.py +1 -1
  48. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  49. maxframe/dataframe/groupby/core.py +1 -1
  50. maxframe/dataframe/groupby/cum.py +0 -1
  51. maxframe/dataframe/groupby/fill.py +4 -1
  52. maxframe/dataframe/groupby/getitem.py +6 -0
  53. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  54. maxframe/dataframe/groupby/transform.py +5 -1
  55. maxframe/dataframe/indexing/align.py +1 -1
  56. maxframe/dataframe/indexing/loc.py +6 -4
  57. maxframe/dataframe/indexing/rename.py +5 -28
  58. maxframe/dataframe/indexing/sample.py +0 -1
  59. maxframe/dataframe/indexing/set_index.py +68 -1
  60. maxframe/dataframe/initializer.py +11 -1
  61. maxframe/dataframe/merge/__init__.py +9 -1
  62. maxframe/dataframe/merge/concat.py +41 -31
  63. maxframe/dataframe/merge/merge.py +237 -3
  64. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  65. maxframe/dataframe/misc/__init__.py +4 -0
  66. maxframe/dataframe/misc/apply.py +6 -11
  67. maxframe/dataframe/misc/case_when.py +141 -0
  68. maxframe/dataframe/misc/describe.py +2 -2
  69. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  70. maxframe/dataframe/misc/eval.py +4 -0
  71. maxframe/dataframe/misc/memory_usage.py +2 -2
  72. maxframe/dataframe/misc/pct_change.py +1 -83
  73. maxframe/dataframe/misc/pivot_table.py +262 -0
  74. maxframe/dataframe/misc/tests/test_misc.py +93 -1
  75. maxframe/dataframe/misc/transform.py +1 -30
  76. maxframe/dataframe/misc/value_counts.py +4 -17
  77. maxframe/dataframe/missing/dropna.py +1 -1
  78. maxframe/dataframe/missing/fillna.py +5 -5
  79. maxframe/dataframe/operators.py +1 -17
  80. maxframe/dataframe/plotting/core.py +2 -2
  81. maxframe/dataframe/reduction/core.py +4 -3
  82. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  83. maxframe/dataframe/sort/sort_values.py +1 -11
  84. maxframe/dataframe/statistics/corr.py +3 -3
  85. maxframe/dataframe/statistics/quantile.py +13 -19
  86. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  87. maxframe/dataframe/tests/test_initializer.py +33 -2
  88. maxframe/dataframe/utils.py +33 -11
  89. maxframe/dataframe/window/expanding.py +5 -3
  90. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  91. maxframe/errors.py +13 -0
  92. maxframe/extension.py +12 -0
  93. maxframe/io/__init__.py +13 -0
  94. maxframe/io/objects/__init__.py +24 -0
  95. maxframe/io/objects/core.py +140 -0
  96. maxframe/io/objects/tensor.py +76 -0
  97. maxframe/io/objects/tests/__init__.py +13 -0
  98. maxframe/io/objects/tests/test_object_io.py +97 -0
  99. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  100. maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
  101. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  102. maxframe/io/odpsio/tableio.py +719 -0
  103. maxframe/io/odpsio/tests/__init__.py +13 -0
  104. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
  105. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  106. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  107. maxframe/io/odpsio/volumeio.py +63 -0
  108. maxframe/learn/contrib/__init__.py +3 -1
  109. maxframe/learn/contrib/graph/__init__.py +15 -0
  110. maxframe/learn/contrib/graph/connected_components.py +215 -0
  111. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  112. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  113. maxframe/learn/contrib/llm/__init__.py +16 -0
  114. maxframe/learn/contrib/llm/core.py +54 -0
  115. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  116. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  117. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  118. maxframe/learn/contrib/llm/text.py +42 -0
  119. maxframe/learn/contrib/utils.py +52 -0
  120. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  121. maxframe/learn/contrib/xgboost/classifier.py +110 -0
  122. maxframe/learn/contrib/xgboost/core.py +241 -0
  123. maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
  124. maxframe/learn/contrib/xgboost/predict.py +121 -0
  125. maxframe/learn/contrib/xgboost/regressor.py +71 -0
  126. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  127. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  128. maxframe/learn/contrib/xgboost/train.py +132 -0
  129. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  130. maxframe/learn/utils/__init__.py +15 -0
  131. maxframe/learn/utils/core.py +29 -0
  132. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  133. maxframe/lib/mmh3.pyi +43 -0
  134. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  135. maxframe/lib/wrapped_pickle.py +2 -1
  136. maxframe/opcodes.py +11 -0
  137. maxframe/protocol.py +154 -27
  138. maxframe/remote/core.py +4 -8
  139. maxframe/serialization/__init__.py +1 -0
  140. maxframe/serialization/core.cp311-win32.pyd +0 -0
  141. maxframe/serialization/core.pxd +3 -0
  142. maxframe/serialization/core.pyi +64 -0
  143. maxframe/serialization/core.pyx +67 -26
  144. maxframe/serialization/exception.py +1 -1
  145. maxframe/serialization/pandas.py +52 -17
  146. maxframe/serialization/serializables/core.py +180 -15
  147. maxframe/serialization/serializables/field_type.py +4 -1
  148. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  149. maxframe/serialization/tests/test_serial.py +2 -1
  150. maxframe/session.py +37 -2
  151. maxframe/tensor/__init__.py +81 -2
  152. maxframe/tensor/arithmetic/isclose.py +1 -0
  153. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  154. maxframe/tensor/core.py +5 -136
  155. maxframe/tensor/datasource/array.py +7 -2
  156. maxframe/tensor/datasource/full.py +1 -1
  157. maxframe/tensor/datasource/scalar.py +1 -1
  158. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  159. maxframe/tensor/indexing/flatnonzero.py +1 -1
  160. maxframe/tensor/indexing/getitem.py +2 -0
  161. maxframe/tensor/merge/__init__.py +2 -0
  162. maxframe/tensor/merge/concatenate.py +101 -0
  163. maxframe/tensor/merge/tests/test_merge.py +30 -1
  164. maxframe/tensor/merge/vstack.py +74 -0
  165. maxframe/tensor/{base → misc}/__init__.py +4 -0
  166. maxframe/tensor/misc/atleast_1d.py +72 -0
  167. maxframe/tensor/misc/atleast_2d.py +70 -0
  168. maxframe/tensor/misc/atleast_3d.py +85 -0
  169. maxframe/tensor/misc/tests/__init__.py +13 -0
  170. maxframe/tensor/{base → misc}/transpose.py +22 -18
  171. maxframe/tensor/misc/unique.py +205 -0
  172. maxframe/tensor/operators.py +1 -7
  173. maxframe/tensor/random/core.py +1 -1
  174. maxframe/tensor/reduction/count_nonzero.py +2 -1
  175. maxframe/tensor/reduction/mean.py +1 -0
  176. maxframe/tensor/reduction/nanmean.py +1 -0
  177. maxframe/tensor/reduction/nanvar.py +2 -0
  178. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  179. maxframe/tensor/reduction/var.py +2 -0
  180. maxframe/tensor/statistics/quantile.py +2 -2
  181. maxframe/tensor/utils.py +2 -22
  182. maxframe/tests/test_protocol.py +34 -0
  183. maxframe/tests/test_utils.py +0 -12
  184. maxframe/tests/utils.py +17 -2
  185. maxframe/typing_.py +4 -1
  186. maxframe/udf.py +62 -3
  187. maxframe/utils.py +112 -86
  188. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
  189. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
  190. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
  191. maxframe_client/__init__.py +0 -1
  192. maxframe_client/clients/framedriver.py +4 -1
  193. maxframe_client/fetcher.py +123 -54
  194. maxframe_client/session/consts.py +3 -0
  195. maxframe_client/session/graph.py +8 -2
  196. maxframe_client/session/odps.py +223 -40
  197. maxframe_client/session/task.py +108 -80
  198. maxframe_client/tests/test_fetcher.py +21 -3
  199. maxframe_client/tests/test_session.py +136 -8
  200. maxframe/core/entity/chunks.py +0 -68
  201. maxframe/core/entity/fuse.py +0 -73
  202. maxframe/core/graph/builder/chunk.py +0 -430
  203. maxframe/odpsio/tableio.py +0 -300
  204. maxframe/odpsio/volumeio.py +0 -95
  205. maxframe_client/clients/spe.py +0 -104
  206. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  207. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  208. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  209. /maxframe/tensor/{base → misc}/astype.py +0 -0
  210. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  211. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  212. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  213. /maxframe/tensor/{base → misc}/where.py +0 -0
  214. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -114,7 +114,6 @@ from .arithmetic import (
114
114
  )
115
115
  from .arithmetic import truediv as true_divide
116
116
  from .arithmetic import trunc
117
- from .base import broadcast_to, transpose, where
118
117
  from .core import Tensor
119
118
  from .datasource import (
120
119
  arange,
@@ -143,7 +142,16 @@ from .indexing import (
143
142
  take,
144
143
  unravel_index,
145
144
  )
146
- from .merge import stack
145
+ from .merge import concatenate, stack, vstack
146
+ from .misc import (
147
+ atleast_1d,
148
+ atleast_2d,
149
+ atleast_3d,
150
+ broadcast_to,
151
+ transpose,
152
+ unique,
153
+ where,
154
+ )
147
155
  from .rechunk import rechunk
148
156
  from .reduction import (
149
157
  all,
@@ -180,4 +188,75 @@ from .reduction import std, sum, var
180
188
  from .reshape import reshape
181
189
  from .ufunc import ufunc
182
190
 
191
+ # isort: off
192
+ # noinspection PyUnresolvedReferences
193
+ from numpy import (
194
+ e,
195
+ errstate,
196
+ geterr,
197
+ inf,
198
+ nan,
199
+ newaxis,
200
+ pi,
201
+ seterr,
202
+ )
203
+
204
+ try:
205
+ from numpy.exceptions import AxisError
206
+ except ImportError:
207
+ from numpy import AxisError
208
+
209
+ NAN = nan
210
+ NINF = -inf
211
+ Inf = inf
212
+ NaN = nan
213
+
214
+ # import numpy types
215
+ # noinspection PyUnresolvedReferences
216
+ from numpy import (
217
+ bool_ as bool,
218
+ bytes_,
219
+ character,
220
+ complex64,
221
+ complex128,
222
+ complexfloating,
223
+ datetime64,
224
+ double,
225
+ dtype,
226
+ flexible,
227
+ float16,
228
+ float32,
229
+ float64,
230
+ floating,
231
+ generic,
232
+ inexact,
233
+ int8,
234
+ int16,
235
+ int32,
236
+ int64,
237
+ intc,
238
+ intp,
239
+ number,
240
+ integer,
241
+ object_ as object,
242
+ signedinteger,
243
+ timedelta64,
244
+ uint,
245
+ uint8,
246
+ uint16,
247
+ uint32,
248
+ uint64,
249
+ unsignedinteger,
250
+ void,
251
+ )
252
+
253
+ try:
254
+ from numpy import cfloat
255
+ except ImportError:
256
+ from numpy import cdouble as cfloat
257
+ try:
258
+ from numpy import str_ as unicode_
259
+ except ImportError:
260
+ from numpy import unicode_
261
+
183
262
  del fetch, ufunc
@@ -23,6 +23,7 @@ from .core import TensorBinOp
23
23
 
24
24
  class TensorIsclose(TensorBinOp):
25
25
  _op_type_ = opcodes.ISCLOSE
26
+ _func_name = "isclose"
26
27
 
27
28
  rtol = Float64Field("rtol", default=None)
28
29
  atol = Float64Field("atol", default=None)
@@ -17,26 +17,13 @@
17
17
  import numpy as np
18
18
  import pytest
19
19
 
20
+ from maxframe.tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
21
+ from maxframe.utils import collect_leaf_operators
22
+
20
23
  from ....core import enter_mode
21
24
  from ...core import SparseTensor, Tensor
22
25
  from ...datasource import array, empty, ones, tensor
23
- from .. import (
24
- TensorAdd,
25
- TensorGreaterThan,
26
- TensorIsclose,
27
- TensorLog,
28
- TensorSubtract,
29
- add,
30
- around,
31
- cos,
32
- frexp,
33
- isclose,
34
- isfinite,
35
- log,
36
- negative,
37
- subtract,
38
- truediv,
39
- )
26
+ from .. import * # noqa: F401
40
27
 
41
28
 
42
29
  def test_add():
@@ -252,7 +239,7 @@ def test_compare():
252
239
 
253
240
  def test_frexp():
254
241
  t1 = ones((3, 4, 5), chunk_size=2)
255
- t2 = empty((3, 4, 5), dtype=np.float_, chunk_size=2)
242
+ t2 = empty((3, 4, 5), dtype=np.dtype(float), chunk_size=2)
256
243
  op_type = type(t1.op)
257
244
 
258
245
  o1, o2 = frexp(t1)
@@ -412,3 +399,20 @@ def test_build_mode():
412
399
 
413
400
  with enter_mode(build=True):
414
401
  assert t1 != 2
402
+
403
+
404
+ def test_unary_op_func_name():
405
+ # make sure all the unary op has defined the func name.
406
+
407
+ results = collect_leaf_operators(TensorUnaryOp)
408
+ for op_type in results:
409
+ assert hasattr(op_type, "_func_name")
410
+
411
+
412
+ def test_binary_op_func_name():
413
+ # make sure all the binary op has defined the func name.
414
+
415
+ results = collect_leaf_operators(TensorBinOp)
416
+ for op_type in results:
417
+ if op_type not in (TensorSetImag, TensorSetReal):
418
+ assert hasattr(op_type, "_func_name")
maxframe/tensor/core.py CHANGED
@@ -23,8 +23,6 @@ from typing import Any, Dict
23
23
  import numpy as np
24
24
 
25
25
  from ..core import (
26
- Chunk,
27
- ChunkData,
28
26
  HasShapeTileable,
29
27
  HasShapeTileableData,
30
28
  OutputType,
@@ -36,14 +34,9 @@ from ..core.entity.utils import refresh_tileable_shape
36
34
  from ..serialization.serializables import (
37
35
  AnyField,
38
36
  DataTypeField,
39
- EnumField,
40
- FieldTypes,
41
- ListField,
42
37
  Serializable,
43
38
  StringField,
44
- TupleField,
45
39
  )
46
- from ..utils import on_deserialize_shape, on_serialize_shape, skip_na_call
47
40
  from .utils import fetch_corner_data, get_chunk_slices
48
41
 
49
42
  logger = logging.getLogger(__name__)
@@ -56,134 +49,18 @@ class TensorOrder(Enum):
56
49
  F_ORDER = "F"
57
50
 
58
51
 
59
- class TensorChunkData(ChunkData):
60
- __slots__ = ()
61
- _no_copy_attrs_ = ChunkData._no_copy_attrs_ | {"dtype"}
62
- type_name = "Tensor"
63
-
64
- # required fields
65
- _shape = TupleField(
66
- "shape",
67
- FieldTypes.int64,
68
- on_serialize=on_serialize_shape,
69
- on_deserialize=on_deserialize_shape,
70
- )
71
- _order = EnumField("order", TensorOrder, FieldTypes.string)
72
- # optional fields
73
- _dtype = DataTypeField("dtype")
74
-
75
- def __init__(self, op=None, index=None, shape=None, dtype=None, order=None, **kw):
76
- if isinstance(order, str):
77
- order = getattr(TensorOrder, order)
78
- super().__init__(
79
- _op=op, _index=index, _shape=shape, _dtype=dtype, _order=order, **kw
80
- )
81
- if self.order is None and self.op is not None:
82
- if len(self.inputs) == 0:
83
- self._order = TensorOrder.C_ORDER
84
- elif all(
85
- hasattr(inp, "order") and inp.order == TensorOrder.F_ORDER
86
- for inp in self.inputs
87
- ):
88
- self._order = TensorOrder.F_ORDER
89
- else:
90
- self._order = TensorOrder.C_ORDER
91
-
92
- @property
93
- def params(self) -> Dict[str, Any]:
94
- # params return the properties which useful to rebuild a new chunk
95
- return {
96
- "shape": self.shape,
97
- "dtype": self.dtype,
98
- "order": self.order,
99
- "index": self.index,
100
- }
101
-
102
- @params.setter
103
- def params(self, new_params: Dict[str, Any]):
104
- params = new_params.copy()
105
- params.pop("index", None) # index not needed to update
106
- new_shape = params.pop("shape", None)
107
- if new_shape is not None:
108
- self._shape = new_shape
109
- dtype = params.pop("dtype", None)
110
- if dtype is not None:
111
- self._dtype = dtype
112
- order = params.pop("order", None)
113
- if order is not None:
114
- self._order = order
115
- if params: # pragma: no cover
116
- raise TypeError(f"Unknown params: {list(params)}")
117
-
118
- @classmethod
119
- def get_params_from_data(cls, data: np.ndarray) -> Dict[str, Any]:
120
- from .array_utils import is_cupy
121
-
122
- if not is_cupy(data):
123
- data = np.asarray(data)
124
- order = (
125
- TensorOrder.C_ORDER if data.flags["C_CONTIGUOUS"] else TensorOrder.F_ORDER
126
- )
127
- return {"shape": data.shape, "dtype": data.dtype, "order": order}
128
-
129
- def __len__(self):
130
- try:
131
- return self.shape[0]
132
- except IndexError:
133
- if is_build_mode():
134
- return 0
135
- raise TypeError("len() of unsized object")
136
-
137
- @property
138
- def shape(self):
139
- return getattr(self, "_shape", None)
140
-
141
- @property
142
- def ndim(self):
143
- return len(self.shape)
144
-
145
- @property
146
- def size(self):
147
- return np.prod(self.shape).item()
148
-
149
- @property
150
- def dtype(self):
151
- return getattr(self, "_dtype", None) or self.op.dtype
152
-
153
- @property
154
- def order(self):
155
- return getattr(self, "_order", None)
156
-
157
- @property
158
- def nbytes(self):
159
- return np.prod(self.shape) * self.dtype.itemsize
160
-
161
-
162
- class TensorChunk(Chunk):
163
- __slots__ = ()
164
- _allow_data_type_ = (TensorChunkData,)
165
- type_name = "Tensor"
166
-
167
- def __len__(self):
168
- return len(self._data)
169
-
170
-
171
52
  class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
172
53
  __slots__ = ()
173
54
  type_name = "Tensor"
174
55
 
56
+ _legacy_deprecated_non_primitives = ["_chunks"]
57
+
175
58
  # required fields
176
59
  _order = StringField(
177
60
  "order", on_serialize=attrgetter("value"), on_deserialize=TensorOrder
178
61
  )
179
62
  # optional fields
180
63
  _dtype = DataTypeField("dtype")
181
- _chunks = ListField(
182
- "chunks",
183
- FieldTypes.reference(TensorChunkData),
184
- on_serialize=skip_na_call(lambda x: [it.data for it in x]),
185
- on_deserialize=skip_na_call(lambda x: [TensorChunk(it) for it in x]),
186
- )
187
64
 
188
65
  def __init__(
189
66
  self,
@@ -318,7 +195,7 @@ class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
318
195
  return fromsparse(self, fill_value=fill_value)
319
196
 
320
197
  def transpose(self, *axes):
321
- from .base import transpose
198
+ from .misc import transpose
322
199
 
323
200
  if len(axes) == 1 and isinstance(axes[0], Iterable):
324
201
  axes = axes[0]
@@ -346,11 +223,6 @@ class TensorData(HasShapeTileableData, _ExecuteAndFetchMixin):
346
223
 
347
224
  return reshape(self, shape, order=order)
348
225
 
349
- def totiledb(self, uri, ctx=None, key=None, timestamp=None):
350
- from .datastore import totiledb
351
-
352
- return totiledb(uri, self, ctx=ctx, key=key, timestamp=timestamp)
353
-
354
226
  @staticmethod
355
227
  def from_dataframe(in_df):
356
228
  from .datasource import from_dataframe
@@ -526,9 +398,6 @@ class Tensor(HasShapeTileable):
526
398
  """
527
399
  return self._data.T
528
400
 
529
- def totiledb(self, uri, ctx=None, key=None, timestamp=None):
530
- return self._data.totiledb(uri, ctx=ctx, key=key, timestamp=timestamp)
531
-
532
401
  def copy(self, order="C"):
533
402
  return super().copy().astype(self.dtype, order=order, copy=False)
534
403
 
@@ -589,7 +458,7 @@ class Tensor(HasShapeTileable):
589
458
  array([('c', 1), ('a', 2)],
590
459
  dtype=[('x', '|S1'), ('y', '<i4')])
591
460
  """
592
- from .base import sort
461
+ from .misc import sort
593
462
 
594
463
  self._data = sort(
595
464
  self,
@@ -651,7 +520,7 @@ class Tensor(HasShapeTileable):
651
520
  >>> a.execute()
652
521
  array([1, 2, 3, 4])
653
522
  """
654
- from .base import partition
523
+ from .misc import partition
655
524
 
656
525
  self._data = partition(self, kth, axis=axis, kind=kind, order=order, **kw).data
657
526
 
@@ -20,6 +20,7 @@ from ...serialization.serializables import (
20
20
  AnyField,
21
21
  FieldTypes,
22
22
  NDArrayField,
23
+ StringField,
23
24
  TupleField,
24
25
  )
25
26
  from ...utils import on_deserialize_shape, on_serialize_shape
@@ -37,8 +38,9 @@ class ArrayDataSource(TensorNoInput):
37
38
 
38
39
  _op_type_ = opcodes.TENSOR_DATA_SOURCE
39
40
 
40
- data = NDArrayField("data")
41
- chunk_size = AnyField("chunk_size")
41
+ data = NDArrayField("data", default=None)
42
+ chunk_size = AnyField("chunk_size", default=None)
43
+ order = StringField("order", default=None)
42
44
 
43
45
  def __init__(self, data=None, dtype=None, gpu=None, **kw):
44
46
  if dtype is not None:
@@ -51,6 +53,9 @@ class ArrayDataSource(TensorNoInput):
51
53
 
52
54
  super().__init__(data=data, dtype=dtype, gpu=gpu, **kw)
53
55
 
56
+ def get_data(self):
57
+ return self.data
58
+
54
59
 
55
60
  class CSRMatrixDataSource(TensorNoInput):
56
61
  """
@@ -89,7 +89,7 @@ def full(shape, fill_value, dtype=None, chunk_size=None, gpu=None, order="C"):
89
89
  """
90
90
  v = np.asarray(fill_value)
91
91
  if len(v.shape) > 0:
92
- from ..base import broadcast_to
92
+ from ..misc import broadcast_to
93
93
 
94
94
  return broadcast_to(
95
95
  tensor(v, dtype=dtype, chunk_size=chunk_size, gpu=gpu, order=order), shape
@@ -33,7 +33,7 @@ class Scalar(TensorNoInput):
33
33
  def scalar(data, dtype=None, gpu=None):
34
34
  try:
35
35
  arr = np.array(data, dtype=dtype)
36
- op = Scalar(arr, dtype=arr.dtype, gpu=gpu)
36
+ op = Scalar(data=arr, dtype=arr.dtype, gpu=gpu)
37
37
  shape = ()
38
38
  return op(shape)
39
39
  except ValueError:
@@ -141,7 +141,7 @@ def test_zeros():
141
141
 
142
142
 
143
143
  def test_data_source():
144
- from ...base.broadcast_to import TensorBroadcastTo
144
+ from ...misc.broadcast_to import TensorBroadcastTo
145
145
 
146
146
  data = np.random.random((10, 3))
147
147
  t = tensor(data, chunk_size=2)
@@ -55,6 +55,6 @@ def flatnonzero(a):
55
55
  >>> x.ravel()[mt.flatnonzero(x)].execute() # TODO(jisheng): accomplish this after fancy indexing is supported
56
56
 
57
57
  """
58
- from ..base import ravel
58
+ from ..misc import ravel
59
59
 
60
60
  return nonzero(ravel(a))[0]
@@ -130,6 +130,8 @@ def _calc_order(a, index):
130
130
  continue
131
131
  elif isinstance(ind, slice):
132
132
  shape = a.shape[in_axis]
133
+ if shape is np.nan:
134
+ return TensorOrder.C_ORDER
133
135
  slc = ind.indices(shape)
134
136
  if slc[0] == 0 and slc[1] == shape and slc[2] == 1:
135
137
  continue
@@ -12,4 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from .concatenate import concatenate
15
16
  from .stack import stack
17
+ from .vstack import vstack
@@ -0,0 +1,101 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import numpy as np
15
+
16
+ from ... import opcodes
17
+ from ...serialization.serializables import Int32Field
18
+ from ..datasource import tensor as astensor
19
+ from ..operators import TensorOperator, TensorOperatorMixin
20
+ from ..utils import validate_axis
21
+
22
+
23
+ class TensorConcatenate(TensorOperator, TensorOperatorMixin):
24
+ _op_type_ = opcodes.CONCATENATE
25
+
26
+ axis = Int32Field("axis", default=0)
27
+
28
+ def __call__(self, tensors):
29
+ axis = self.axis
30
+ shape = _calc_concatenate_shape(tensors, axis)
31
+ shape[axis] = sum(t.shape[axis] for t in tensors)
32
+ return self.new_tensor(tensors, shape=tuple(shape))
33
+
34
+
35
+ def concatenate(tensors, axis=0):
36
+ """
37
+ Join a sequence of arrays along an existing axis.
38
+
39
+ Parameters
40
+ ----------
41
+ a1, a2, ... : sequence of array_like
42
+ The tensors must have the same shape, except in the dimension
43
+ corresponding to `axis` (the first, by default).
44
+ axis : int, optional
45
+ The axis along which the tensors will be joined. Default is 0.
46
+
47
+ Returns
48
+ -------
49
+ res : Tensor
50
+ The concatenated tensor.
51
+
52
+ See Also
53
+ --------
54
+ stack : Stack a sequence of tensors along a new axis.
55
+ vstack : Stack tensors in sequence vertically (row wise)
56
+
57
+ Examples
58
+ --------
59
+ >>> import maxframe.tensor as mt
60
+
61
+ >>> a = mt.array([[1, 2], [3, 4]])
62
+ >>> b = mt.array([[5, 6]])
63
+ >>> mt.concatenate((a, b), axis=0).execute()
64
+ array([[1, 2],
65
+ [3, 4],
66
+ [5, 6]])
67
+ >>> mt.concatenate((a, b.T), axis=1).execute()
68
+ array([[1, 2, 5],
69
+ [3, 4, 6]])
70
+
71
+ """
72
+ if axis is None:
73
+ axis = 0
74
+ tensors = [astensor(t) for t in tensors]
75
+ axis = validate_axis(tensors[0].ndim, axis)
76
+
77
+ if len(set(t.ndim for t in tensors)) != 1:
78
+ raise ValueError("all the input tensors must have same number of dimensions")
79
+
80
+ shapes = [t.shape[:axis] + t.shape[axis + 1 :] for t in tensors]
81
+ if len(set(shapes)) != 1:
82
+ raise ValueError(
83
+ "all the input tensor dimensions "
84
+ "except for the concatenation axis must match exactly"
85
+ )
86
+ shape = _calc_concatenate_shape(tensors, axis)
87
+ if any(np.isnan(s) for i, s in enumerate(shape) if i != axis):
88
+ raise ValueError("cannot concatenate tensor with unknown shape")
89
+
90
+ return _concatenate(tensors, axis)
91
+
92
+
93
+ def _concatenate(tensors, axis=0):
94
+ dtype = np.result_type(*(t.dtype for t in tensors))
95
+
96
+ op = TensorConcatenate(axis=axis, dtype=dtype)
97
+ return op(tensors)
98
+
99
+
100
+ def _calc_concatenate_shape(tensors, axis):
101
+ return [0 if i == axis else tensors[0].shape[i] for i in range(tensors[0].ndim)]
@@ -18,7 +18,36 @@ import numpy as np
18
18
  import pytest
19
19
 
20
20
  from ...datasource import empty, ones
21
- from .. import stack
21
+ from .. import concatenate, stack
22
+
23
+
24
+ def test_concatenate():
25
+ a = ones((10, 20, 30), chunk_size=10)
26
+ b = ones((20, 20, 30), chunk_size=20)
27
+
28
+ c = concatenate([a, b])
29
+ assert c.shape == (30, 20, 30)
30
+
31
+ a = ones((10, 20, 30), chunk_size=10)
32
+ b = ones((10, 20, 40), chunk_size=20)
33
+
34
+ c = concatenate([a, b], axis=-1)
35
+ assert c.shape == (10, 20, 70)
36
+
37
+ with pytest.raises(ValueError):
38
+ a = ones((10, 20, 30), chunk_size=10)
39
+ b = ones((20, 30, 30), chunk_size=20)
40
+
41
+ concatenate([a, b])
42
+
43
+ with pytest.raises(ValueError):
44
+ a = ones((10, 20, 30), chunk_size=10)
45
+ b = ones((20, 20), chunk_size=20)
46
+
47
+ concatenate([a, b])
48
+
49
+ a = ones((10, 20, 30), chunk_size=5)
50
+ b = ones((20, 20, 30), chunk_size=10)
22
51
 
23
52
 
24
53
  def test_stack():
@@ -0,0 +1,74 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from ..misc import atleast_2d
17
+ from .concatenate import _concatenate, concatenate
18
+
19
+
20
+ def vstack(tup):
21
+ """
22
+ Stack tensors in sequence vertically (row wise).
23
+
24
+ This is equivalent to concatenation along the first axis after 1-D tensors
25
+ of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds tensors divided by
26
+ `vsplit`.
27
+
28
+ This function makes most sense for tensors with up to 3 dimensions. For
29
+ instance, for pixel-data with a height (first axis), width (second axis),
30
+ and r/g/b channels (third axis). The functions `concatenate`, `stack` and
31
+ `block` provide more general stacking and concatenation operations.
32
+
33
+ Parameters
34
+ ----------
35
+ tup : sequence of tensors
36
+ The tensors must have the same shape along all but the first axis.
37
+ 1-D tensors must have the same length.
38
+
39
+ Returns
40
+ -------
41
+ stacked : Tensor
42
+ The tensor formed by stacking the given tensors, will be at least 2-D.
43
+
44
+ See Also
45
+ --------
46
+ stack : Join a sequence of tensors along a new axis.
47
+ concatenate : Join a sequence of tensors along an existing axis.
48
+
49
+ Examples
50
+ --------
51
+ >>> import mars.tensor as mt
52
+
53
+ >>> a = mt.array([1, 2, 3])
54
+ >>> b = mt.array([2, 3, 4])
55
+ >>> mt.vstack((a,b)).execute()
56
+ array([[1, 2, 3],
57
+ [2, 3, 4]])
58
+
59
+ >>> a = mt.array([[1], [2], [3]])
60
+ >>> b = mt.array([[2], [3], [4]])
61
+ >>> mt.vstack((a,b)).execute()
62
+ array([[1],
63
+ [2],
64
+ [3],
65
+ [2],
66
+ [3],
67
+ [4]])
68
+
69
+ """
70
+ return concatenate([atleast_2d(t) for t in tup], axis=0)
71
+
72
+
73
+ def _vstack(tup):
74
+ return _concatenate([atleast_2d(t) for t in tup], axis=0)
@@ -13,9 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .astype import TensorAstype
16
+ from .atleast_1d import atleast_1d
17
+ from .atleast_2d import atleast_2d
18
+ from .atleast_3d import atleast_3d
16
19
  from .broadcast_to import TensorBroadcastTo, broadcast_to
17
20
  from .ravel import ravel
18
21
  from .transpose import transpose
22
+ from .unique import unique
19
23
  from .where import TensorWhere, where
20
24
 
21
25