maxframe 0.1.0b4__cp311-cp311-win32.whl → 0.1.0b5__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (53) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win32.pyd +0 -0
  3. maxframe/codegen.py +46 -1
  4. maxframe/config/config.py +11 -1
  5. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  6. maxframe/dataframe/__init__.py +1 -0
  7. maxframe/dataframe/core.py +30 -8
  8. maxframe/dataframe/datasource/read_odps_query.py +3 -1
  9. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  10. maxframe/dataframe/misc/__init__.py +4 -0
  11. maxframe/dataframe/misc/apply.py +1 -1
  12. maxframe/dataframe/misc/case_when.py +141 -0
  13. maxframe/dataframe/misc/pivot_table.py +262 -0
  14. maxframe/dataframe/misc/tests/test_misc.py +61 -0
  15. maxframe/dataframe/plotting/core.py +2 -2
  16. maxframe/dataframe/reduction/core.py +2 -1
  17. maxframe/dataframe/utils.py +7 -0
  18. maxframe/learn/contrib/utils.py +52 -0
  19. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  20. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  21. maxframe/learn/contrib/xgboost/core.py +156 -0
  22. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  23. maxframe/learn/contrib/xgboost/predict.py +138 -0
  24. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  25. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  26. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  27. maxframe/learn/contrib/xgboost/train.py +121 -0
  28. maxframe/learn/utils/__init__.py +15 -0
  29. maxframe/learn/utils/core.py +29 -0
  30. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  31. maxframe/odpsio/arrow.py +2 -3
  32. maxframe/odpsio/tableio.py +22 -0
  33. maxframe/odpsio/tests/test_schema.py +16 -11
  34. maxframe/opcodes.py +3 -0
  35. maxframe/serialization/core.cp311-win32.pyd +0 -0
  36. maxframe/serialization/core.pyi +61 -0
  37. maxframe/session.py +28 -0
  38. maxframe/tensor/__init__.py +1 -1
  39. maxframe/tensor/base/__init__.py +2 -0
  40. maxframe/tensor/base/atleast_1d.py +74 -0
  41. maxframe/tensor/base/unique.py +205 -0
  42. maxframe/tensor/datasource/array.py +4 -2
  43. maxframe/tensor/datasource/scalar.py +1 -1
  44. maxframe/udf.py +63 -3
  45. maxframe/utils.py +6 -0
  46. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/METADATA +2 -2
  47. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/RECORD +53 -36
  48. maxframe_client/fetcher.py +65 -3
  49. maxframe_client/session/odps.py +30 -1
  50. maxframe_client/session/task.py +26 -53
  51. maxframe_client/tests/test_session.py +28 -1
  52. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/WHEEL +0 -0
  53. {maxframe-0.1.0b4.dist-info → maxframe-0.1.0b5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,15 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .core import convert_to_tensor_or_dataframe
@@ -0,0 +1,29 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from ...dataframe import DataFrame, Series
18
+ from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
19
+ from ...tensor import tensor as astensor
20
+
21
+
22
+ def convert_to_tensor_or_dataframe(item):
23
+ if isinstance(item, (DATAFRAME_TYPE, pd.DataFrame)):
24
+ item = DataFrame(item)
25
+ elif isinstance(item, (SERIES_TYPE, pd.Series)):
26
+ item = Series(item)
27
+ else:
28
+ item = astensor(item)
29
+ return item
Binary file
maxframe/odpsio/arrow.py CHANGED
@@ -17,10 +17,9 @@ from typing import Any, Tuple, Union
17
17
  import pandas as pd
18
18
  import pyarrow as pa
19
19
 
20
- import maxframe.tensor as mt
21
-
22
20
  from ..core import OutputType
23
21
  from ..protocol import DataFrameTableMeta
22
+ from ..tensor.core import TENSOR_TYPE
24
23
  from ..typing_ import ArrowTableType, PandasObjectTypes
25
24
  from .schema import build_dataframe_table_meta
26
25
 
@@ -83,7 +82,7 @@ def pandas_to_arrow(
83
82
  df = df.to_frame(name=names[0] if len(names) == 1 else names)
84
83
  elif table_meta.type == OutputType.scalar:
85
84
  names = ["_idx_0"]
86
- if isinstance(df, mt.Tensor):
85
+ if isinstance(df, TENSOR_TYPE):
87
86
  df = pd.DataFrame([], columns=names).astype({names[0]: df.dtype})
88
87
  else:
89
88
  df = pd.DataFrame([[df]], columns=names)
@@ -183,6 +183,28 @@ class HaloTableIO(MCTableIO):
183
183
  for pt in partitions
184
184
  ]
185
185
 
186
+ def get_table_record_count(
187
+ self, full_table_name: str, partitions: PartitionsType = None
188
+ ):
189
+ from odps.apis.storage_api import SplitOptions, TableBatchScanRequest
190
+
191
+ table = self._odps.get_table(full_table_name)
192
+ client = StorageApiArrowClient(
193
+ self._odps, table, rest_endpoint=self._storage_api_endpoint
194
+ )
195
+
196
+ split_option = SplitOptions.SplitMode.SIZE
197
+
198
+ scan_kw = {
199
+ "required_partitions": self._convert_partitions(partitions),
200
+ "split_options": SplitOptions.get_default_options(split_option),
201
+ }
202
+
203
+ # todo add more options for partition column handling
204
+ req = TableBatchScanRequest(**scan_kw)
205
+ resp = client.create_read_session(req)
206
+ return resp.record_count
207
+
186
208
  @contextmanager
187
209
  def open_reader(
188
210
  self,
@@ -30,20 +30,23 @@ from ..schema import (
30
30
  )
31
31
 
32
32
 
33
- def _wrap_maxframe_obj(obj, wrap=True):
34
- if not wrap:
33
+ def _wrap_maxframe_obj(obj, wrap="no"):
34
+ if wrap == "no":
35
35
  return obj
36
36
  if isinstance(obj, pd.DataFrame):
37
- return md.DataFrame(obj)
37
+ obj = md.DataFrame(obj)
38
38
  elif isinstance(obj, pd.Series):
39
- return md.Series(obj)
39
+ obj = md.Series(obj)
40
40
  elif isinstance(obj, pd.Index):
41
- return md.Index(obj)
41
+ obj = md.Index(obj)
42
42
  else:
43
- return mt.scalar(obj)
43
+ obj = mt.scalar(obj)
44
+ if wrap == "data":
45
+ return obj.data
46
+ return obj
44
47
 
45
48
 
46
- @pytest.mark.parametrize("wrap_obj", [False, True])
49
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
47
50
  def test_pandas_to_odps_schema_dataframe(wrap_obj):
48
51
  data = pd.DataFrame(np.random.rand(100, 5), columns=list("ABCDE"))
49
52
 
@@ -94,7 +97,7 @@ def test_pandas_to_odps_schema_dataframe(wrap_obj):
94
97
  assert meta.pd_index_level_names == [None, None]
95
98
 
96
99
 
97
- @pytest.mark.parametrize("wrap_obj", [False, True])
100
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
98
101
  def test_pandas_to_odps_schema_series(wrap_obj):
99
102
  data = pd.Series(np.random.rand(100))
100
103
 
@@ -135,7 +138,7 @@ def test_pandas_to_odps_schema_series(wrap_obj):
135
138
  assert meta.pd_index_level_names == ["c1", "c2"]
136
139
 
137
140
 
138
- @pytest.mark.parametrize("wrap_obj", [False, True])
141
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
139
142
  def test_pandas_to_odps_schema_index(wrap_obj):
140
143
  data = pd.Index(np.random.randint(0, 100, 100))
141
144
 
@@ -167,11 +170,13 @@ def test_pandas_to_odps_schema_index(wrap_obj):
167
170
  assert meta.pd_index_level_names == ["c1", "c2"]
168
171
 
169
172
 
170
- @pytest.mark.parametrize("wrap_obj", [False, True])
173
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
171
174
  def test_pandas_to_odps_schema_scalar(wrap_obj):
172
175
  data = 1234.56
173
176
 
174
177
  test_scalar = _wrap_maxframe_obj(data, wrap=wrap_obj)
178
+ if wrap_obj != "no":
179
+ test_scalar.op.data = None
175
180
  schema, meta = pandas_to_odps_schema(test_scalar, unknown_as_string=True)
176
181
  assert schema.columns[0].name == "_idx_0"
177
182
  assert schema.columns[0].type.name == "double"
@@ -279,7 +284,7 @@ def test_build_column_name():
279
284
  assert build_table_column_name(4, ("A", 1), records) == "a_1"
280
285
 
281
286
 
282
- @pytest.mark.parametrize("wrap_obj", [False, True])
287
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
283
288
  def test_build_table_meta(wrap_obj):
284
289
  data = pd.DataFrame(
285
290
  np.random.rand(100, 7),
maxframe/opcodes.py CHANGED
@@ -386,6 +386,9 @@ DATAFRAME_EVAL = 738
386
386
  DUPLICATED = 739
387
387
  DELETE = 740
388
388
  ALIGN = 741
389
+ CASE_WHEN = 742
390
+ PIVOT = 743
391
+ PIVOT_TABLE = 744
389
392
 
390
393
  FUSE = 801
391
394
 
Binary file
@@ -0,0 +1,61 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from concurrent.futures import Executor
16
+ from typing import Any, Callable, Dict, List, TypeVar
17
+
18
+ def buffered(func: Callable) -> Callable: ...
19
+ def fast_id(obj: Any) -> int: ...
20
+
21
+ LoadType = TypeVar("LoadType")
22
+
23
+ def load_type(class_name: str, parent_class: LoadType) -> LoadType: ...
24
+
25
+ class PickleContainer:
26
+ def __init__(self, buffers: List[bytes]): ...
27
+ def get(self) -> Any: ...
28
+ def get_buffers(self) -> List[bytes]: ...
29
+
30
+ class Serializer:
31
+ serializer_id: int
32
+ def serial(self, obj: Any, context: Dict): ...
33
+ def deserial(self, serialized: List, context: Dict, subs: List[Any]): ...
34
+ def on_deserial_error(
35
+ self,
36
+ serialized: List,
37
+ context: Dict,
38
+ subs_serialized: List,
39
+ error_index: int,
40
+ exc: BaseException,
41
+ ): ...
42
+ @classmethod
43
+ def register(cls, obj_type): ...
44
+ @classmethod
45
+ def unregister(cls, obj_type): ...
46
+
47
+ class Placeholder:
48
+ id: int
49
+ callbacks: List[Callable]
50
+ def __init__(self, id_: int): ...
51
+ def __hash__(self): ...
52
+ def __eq__(self, other): ...
53
+
54
+ def serialize(obj: Any, context: Dict = None): ...
55
+ async def serialize_with_spawn(
56
+ obj: Any,
57
+ context: Dict = None,
58
+ spawn_threshold: int = 100,
59
+ executor: Executor = None,
60
+ ): ...
61
+ def deserialize(headers: List, buffers: List, context: Dict = None): ...
maxframe/session.py CHANGED
@@ -365,6 +365,15 @@ class AbstractAsyncSession(AbstractSession, metaclass=ABCMeta):
365
365
  Stop server.
366
366
  """
367
367
 
368
+ @abstractmethod
369
+ async def get_logview_address(self, hours=None) -> Optional[str]:
370
+ """
371
+ Get Logview address
372
+ Returns
373
+ -------
374
+ Logview address
375
+ """
376
+
368
377
  def close(self):
369
378
  asyncio.run(self.destroy())
370
379
 
@@ -549,6 +558,15 @@ class AbstractSyncSession(AbstractSession, metaclass=ABCMeta):
549
558
 
550
559
  return fetch(tileables, self, offsets=offsets, sizes=sizes)
551
560
 
561
+ @abstractmethod
562
+ def get_logview_address(self, hours=None) -> Optional[str]:
563
+ """
564
+ Get logview address
565
+ Returns
566
+ -------
567
+ logview address
568
+ """
569
+
552
570
 
553
571
  def _delegate_to_isolated_session(func: Union[Callable, Coroutine]):
554
572
  if asyncio.iscoroutinefunction(func):
@@ -728,6 +746,11 @@ class AsyncSession(AbstractAsyncSession):
728
746
  await asyncio.wrap_future(asyncio.run_coroutine_threadsafe(coro, self._loop))
729
747
  stop_isolation()
730
748
 
749
+ @implements(AbstractAsyncSession.get_logview_address)
750
+ @_delegate_to_isolated_session
751
+ async def get_logview_address(self, hours=None) -> Optional[str]:
752
+ pass # pragma: no cover
753
+
731
754
 
732
755
  class ProgressBar:
733
756
  def __init__(self, show_progress):
@@ -949,6 +972,11 @@ class SyncSession(AbstractSyncSession):
949
972
  def get_cluster_versions(self) -> List[str]:
950
973
  pass # pragma: no cover
951
974
 
975
+ @implements(AbstractSyncSession.get_logview_address)
976
+ @_delegate_to_isolated_session
977
+ def get_logview_address(self, hours=None) -> Optional[str]:
978
+ pass # pragma: no cover
979
+
952
980
  def destroy(self):
953
981
  coro = self._isolated_session.destroy()
954
982
  asyncio.run_coroutine_threadsafe(coro, self._loop).result()
@@ -114,7 +114,7 @@ from .arithmetic import (
114
114
  )
115
115
  from .arithmetic import truediv as true_divide
116
116
  from .arithmetic import trunc
117
- from .base import broadcast_to, transpose, where
117
+ from .base import broadcast_to, transpose, unique, where
118
118
  from .core import Tensor
119
119
  from .datasource import (
120
120
  arange,
@@ -13,9 +13,11 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from .astype import TensorAstype
16
+ from .atleast_1d import atleast_1d
16
17
  from .broadcast_to import TensorBroadcastTo, broadcast_to
17
18
  from .ravel import ravel
18
19
  from .transpose import transpose
20
+ from .unique import unique
19
21
  from .where import TensorWhere, where
20
22
 
21
23
 
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # Copyright 1999-2021 Alibaba Group Holding Ltd.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ import numpy as np
18
+
19
+ from ...core import ExecutableTuple
20
+ from ..datasource import tensor as astensor
21
+
22
+
23
+ def atleast_1d(*tensors):
24
+ """
25
+ Convert inputs to tensors with at least one dimension.
26
+
27
+ Scalar inputs are converted to 1-dimensional tensors, whilst
28
+ higher-dimensional inputs are preserved.
29
+
30
+ Parameters
31
+ ----------
32
+ tensors1, tensors2, ... : array_like
33
+ One or more input tensors.
34
+
35
+ Returns
36
+ -------
37
+ ret : Tensor
38
+ An tensor, or list of tensors, each with ``a.ndim >= 1``.
39
+ Copies are made only if necessary.
40
+
41
+ See Also
42
+ --------
43
+ atleast_2d, atleast_3d
44
+
45
+ Examples
46
+ --------
47
+ >>> import maxframe.tensor as mt
48
+
49
+ >>> mt.atleast_1d(1.0).execute()
50
+ array([ 1.])
51
+
52
+ >>> x = mt.arange(9.0).reshape(3,3)
53
+ >>> mt.atleast_1d(x).execute()
54
+ array([[ 0., 1., 2.],
55
+ [ 3., 4., 5.],
56
+ [ 6., 7., 8.]])
57
+ >>> mt.atleast_1d(x) is x
58
+ True
59
+
60
+ >>> mt.atleast_1d(1, [3, 4]).execute()
61
+ [array([1]), array([3, 4])]
62
+
63
+ """
64
+ new_tensors = []
65
+ for x in tensors:
66
+ x = astensor(x)
67
+ if x.ndim == 0:
68
+ x = x[np.newaxis]
69
+
70
+ new_tensors.append(x)
71
+
72
+ if len(new_tensors) == 1:
73
+ return new_tensors[0]
74
+ return ExecutableTuple(new_tensors)
@@ -0,0 +1,205 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import numpy as np
17
+
18
+ from ... import opcodes as OperandDef
19
+ from ...serialization.serializables import BoolField, Int32Field
20
+ from ..core import TensorOrder
21
+ from ..operators import TensorHasInput, TensorOperatorMixin
22
+ from ..utils import validate_axis
23
+
24
+
25
+ class TensorUnique(TensorHasInput, TensorOperatorMixin):
26
+ _op_type_ = OperandDef.UNIQUE
27
+
28
+ return_index = BoolField("return_index", default=False)
29
+ return_inverse = BoolField("return_inverse", default=False)
30
+ return_counts = BoolField("return_counts", default=False)
31
+ axis = Int32Field("axis", default=None)
32
+
33
+ @property
34
+ def output_limit(self):
35
+ return 1
36
+
37
+ def _gen_kws(self, input_obj, chunk=False, chunk_index=None):
38
+ kws = []
39
+
40
+ # unique tensor
41
+ shape = list(input_obj.shape)
42
+ shape[self.axis] = np.nan
43
+ kw = {"shape": tuple(shape), "dtype": input_obj.dtype, "gpu": input_obj.op.gpu}
44
+ if chunk:
45
+ idx = [0] * len(shape)
46
+ idx[self.axis] = chunk_index or 0
47
+ kw["index"] = tuple(idx)
48
+ kws.append(kw)
49
+
50
+ # unique indices tensor
51
+ if self.return_index:
52
+ kw = {
53
+ "shape": (np.nan,),
54
+ "dtype": np.dtype(np.intp),
55
+ "gpu": input_obj.op.gpu,
56
+ "type": "indices",
57
+ }
58
+ if chunk:
59
+ kw["index"] = (chunk_index or 0,)
60
+ kws.append(kw)
61
+
62
+ # unique inverse tensor
63
+ if self.return_inverse:
64
+ kw = {
65
+ "shape": (input_obj.shape[self.axis],),
66
+ "dtype": np.dtype(np.intp),
67
+ "gpu": input_obj.op.gpu,
68
+ "type": "inverse",
69
+ }
70
+ if chunk:
71
+ kw["index"] = (chunk_index or 0,)
72
+ kws.append(kw)
73
+
74
+ # unique counts tensor
75
+ if self.return_counts:
76
+ kw = {
77
+ "shape": (np.nan,),
78
+ "dtype": np.dtype(np.int_),
79
+ "gpu": input_obj.op.gpu,
80
+ "type": "counts",
81
+ }
82
+ if chunk:
83
+ kw["index"] = (chunk_index or 0,)
84
+ kws.append(kw)
85
+
86
+ return kws
87
+
88
+ def __call__(self, ar):
89
+ from .atleast_1d import atleast_1d
90
+
91
+ ar = atleast_1d(ar)
92
+ if self.axis is None:
93
+ if ar.ndim > 1:
94
+ ar = ar.flatten()
95
+ self._axis = 0
96
+ else:
97
+ self._axis = validate_axis(ar.ndim, self._axis)
98
+
99
+ kws = self._gen_kws(self, ar)
100
+ tensors = self.new_tensors([ar], kws=kws, order=TensorOrder.C_ORDER)
101
+ if len(tensors) == 1:
102
+ return tensors[0]
103
+ return tensors
104
+
105
+
106
+ def unique(
107
+ ar,
108
+ return_index=False,
109
+ return_inverse=False,
110
+ return_counts=False,
111
+ axis=None,
112
+ ):
113
+ """
114
+ Find the unique elements of a tensor.
115
+
116
+ Returns the sorted unique elements of a tensor. There are three optional
117
+ outputs in addition to the unique elements:
118
+
119
+ * the indices of the input tensor that give the unique values
120
+ * the indices of the unique tensor that reconstruct the input tensor
121
+ * the number of times each unique value comes up in the input tensor
122
+
123
+ Parameters
124
+ ----------
125
+ ar : array_like
126
+ Input tensor. Unless `axis` is specified, this will be flattened if it
127
+ is not already 1-D.
128
+ return_index : bool, optional
129
+ If True, also return the indices of `ar` (along the specified axis,
130
+ if provided, or in the flattened tensor) that result in the unique tensor.
131
+ return_inverse : bool, optional
132
+ If True, also return the indices of the unique tensor (for the specified
133
+ axis, if provided) that can be used to reconstruct `ar`.
134
+ return_counts : bool, optional
135
+ If True, also return the number of times each unique item appears
136
+ in `ar`.
137
+ axis : int or None, optional
138
+ The axis to operate on. If None, `ar` will be flattened. If an integer,
139
+ the subarrays indexed by the given axis will be flattened and treated
140
+ as the elements of a 1-D tensor with the dimension of the given axis,
141
+ see the notes for more details. Object tensors or structured tensors
142
+ that contain objects are not supported if the `axis` kwarg is used. The
143
+ default is None.
144
+
145
+ Returns
146
+ -------
147
+ unique : Tensor
148
+ The sorted unique values.
149
+ unique_indices : Tensor, optional
150
+ The indices of the first occurrences of the unique values in the
151
+ original tensor. Only provided if `return_index` is True.
152
+ unique_inverse : Tensor, optional
153
+ The indices to reconstruct the original tensor from the
154
+ unique tensor. Only provided if `return_inverse` is True.
155
+ unique_counts : Tensor, optional
156
+ The number of times each of the unique values comes up in the
157
+ original tensor. Only provided if `return_counts` is True.
158
+
159
+ Examples
160
+ --------
161
+ >>> import maxframe.tensor as mt
162
+
163
+ >>> mt.unique([1, 1, 2, 2, 3, 3]).execute()
164
+ array([1, 2, 3])
165
+ >>> a = mt.array([[1, 1], [2, 3]])
166
+ >>> mt.unique(a).execute()
167
+ array([1, 2, 3])
168
+
169
+ Return the unique rows of a 2D tensor
170
+
171
+ >>> a = mt.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
172
+ >>> mt.unique(a, axis=0).execute()
173
+ array([[1, 0, 0], [2, 3, 4]])
174
+
175
+ Return the indices of the original tensor that give the unique values:
176
+
177
+ >>> a = mt.array(['a', 'b', 'b', 'c', 'a'])
178
+ >>> u, indices = mt.unique(a, return_index=True)
179
+ >>> u.execute()
180
+ array(['a', 'b', 'c'],
181
+ dtype='|S1')
182
+ >>> indices.execute()
183
+ array([0, 1, 3])
184
+ >>> a[indices].execute()
185
+ array(['a', 'b', 'c'],
186
+ dtype='|S1')
187
+
188
+ Reconstruct the input array from the unique values:
189
+
190
+ >>> a = mt.array([1, 2, 6, 4, 2, 3, 2])
191
+ >>> u, indices = mt.unique(a, return_inverse=True)
192
+ >>> u.execute()
193
+ array([1, 2, 3, 4, 6])
194
+ >>> indices.execute()
195
+ array([0, 1, 4, 3, 1, 2, 1])
196
+ >>> u[indices].execute()
197
+ array([1, 2, 6, 4, 2, 3, 2])
198
+ """
199
+ op = TensorUnique(
200
+ return_index=return_index,
201
+ return_inverse=return_inverse,
202
+ return_counts=return_counts,
203
+ axis=axis,
204
+ )
205
+ return op(ar)
@@ -20,6 +20,7 @@ from ...serialization.serializables import (
20
20
  AnyField,
21
21
  FieldTypes,
22
22
  NDArrayField,
23
+ StringField,
23
24
  TupleField,
24
25
  )
25
26
  from ...utils import on_deserialize_shape, on_serialize_shape
@@ -37,8 +38,9 @@ class ArrayDataSource(TensorNoInput):
37
38
 
38
39
  _op_type_ = opcodes.TENSOR_DATA_SOURCE
39
40
 
40
- data = NDArrayField("data")
41
- chunk_size = AnyField("chunk_size")
41
+ data = NDArrayField("data", default=None)
42
+ chunk_size = AnyField("chunk_size", default=None)
43
+ order = StringField("order", default=None)
42
44
 
43
45
  def __init__(self, data=None, dtype=None, gpu=None, **kw):
44
46
  if dtype is not None:
@@ -33,7 +33,7 @@ class Scalar(TensorNoInput):
33
33
  def scalar(data, dtype=None, gpu=None):
34
34
  try:
35
35
  arr = np.array(data, dtype=dtype)
36
- op = Scalar(arr, dtype=arr.dtype, gpu=gpu)
36
+ op = Scalar(data=arr, dtype=arr.dtype, gpu=gpu)
37
37
  shape = ()
38
38
  return op(shape)
39
39
  except ValueError: