maxframe 1.0.0rc2__cp310-cp310-win_amd64.whl → 1.0.0rc3__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (106) hide show
  1. maxframe/_utils.cp310-win_amd64.pyd +0 -0
  2. maxframe/codegen.py +3 -2
  3. maxframe/config/config.py +16 -9
  4. maxframe/config/validators.py +42 -12
  5. maxframe/conftest.py +13 -2
  6. maxframe/core/__init__.py +2 -13
  7. maxframe/core/entity/__init__.py +0 -4
  8. maxframe/core/entity/objects.py +45 -2
  9. maxframe/core/entity/output_types.py +0 -3
  10. maxframe/core/entity/tests/test_objects.py +43 -0
  11. maxframe/core/entity/tileables.py +5 -78
  12. maxframe/core/graph/__init__.py +2 -2
  13. maxframe/core/graph/builder/__init__.py +0 -1
  14. maxframe/core/graph/builder/base.py +5 -4
  15. maxframe/core/graph/builder/tileable.py +4 -4
  16. maxframe/core/graph/builder/utils.py +4 -8
  17. maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
  18. maxframe/core/graph/entity.py +9 -33
  19. maxframe/core/operator/__init__.py +2 -9
  20. maxframe/core/operator/base.py +3 -5
  21. maxframe/core/operator/objects.py +0 -9
  22. maxframe/core/operator/utils.py +55 -0
  23. maxframe/dataframe/datasource/read_odps_query.py +1 -1
  24. maxframe/dataframe/datasource/read_odps_table.py +1 -1
  25. maxframe/dataframe/datastore/to_odps.py +1 -1
  26. maxframe/dataframe/operators.py +1 -17
  27. maxframe/dataframe/reduction/core.py +2 -2
  28. maxframe/io/objects/__init__.py +24 -0
  29. maxframe/io/objects/core.py +140 -0
  30. maxframe/io/objects/tensor.py +76 -0
  31. maxframe/io/objects/tests/__init__.py +13 -0
  32. maxframe/io/objects/tests/test_object_io.py +97 -0
  33. maxframe/{odpsio → io/odpsio}/__init__.py +2 -0
  34. maxframe/{odpsio → io/odpsio}/arrow.py +4 -4
  35. maxframe/{odpsio → io/odpsio}/schema.py +5 -5
  36. maxframe/{odpsio → io/odpsio}/tableio.py +10 -4
  37. maxframe/io/odpsio/tests/__init__.py +13 -0
  38. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +3 -3
  39. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +3 -3
  40. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  41. maxframe/io/odpsio/volumeio.py +57 -0
  42. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  43. maxframe/learn/contrib/xgboost/core.py +87 -2
  44. maxframe/learn/contrib/xgboost/dmatrix.py +1 -4
  45. maxframe/learn/contrib/xgboost/predict.py +19 -5
  46. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  47. maxframe/learn/contrib/xgboost/train.py +25 -15
  48. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  49. maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
  50. maxframe/protocol.py +1 -15
  51. maxframe/remote/core.py +4 -8
  52. maxframe/serialization/__init__.py +1 -0
  53. maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
  54. maxframe/tensor/__init__.py +10 -2
  55. maxframe/tensor/arithmetic/isclose.py +1 -0
  56. maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
  57. maxframe/tensor/core.py +5 -136
  58. maxframe/tensor/datasource/array.py +3 -0
  59. maxframe/tensor/datasource/full.py +1 -1
  60. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  61. maxframe/tensor/indexing/flatnonzero.py +1 -1
  62. maxframe/tensor/merge/__init__.py +2 -0
  63. maxframe/tensor/merge/concatenate.py +98 -0
  64. maxframe/tensor/merge/tests/test_merge.py +30 -1
  65. maxframe/tensor/merge/vstack.py +70 -0
  66. maxframe/tensor/{base → misc}/__init__.py +2 -0
  67. maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
  68. maxframe/tensor/misc/atleast_2d.py +70 -0
  69. maxframe/tensor/misc/atleast_3d.py +85 -0
  70. maxframe/tensor/misc/tests/__init__.py +13 -0
  71. maxframe/tensor/{base → misc}/transpose.py +22 -18
  72. maxframe/tensor/operators.py +1 -7
  73. maxframe/tensor/random/core.py +1 -1
  74. maxframe/tensor/reduction/count_nonzero.py +1 -0
  75. maxframe/tensor/reduction/mean.py +1 -0
  76. maxframe/tensor/reduction/nanmean.py +1 -0
  77. maxframe/tensor/reduction/nanvar.py +2 -0
  78. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  79. maxframe/tensor/reduction/var.py +2 -0
  80. maxframe/tensor/utils.py +2 -22
  81. maxframe/typing_.py +4 -1
  82. maxframe/udf.py +8 -9
  83. maxframe/utils.py +15 -61
  84. maxframe-1.0.0rc3.dist-info/METADATA +104 -0
  85. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +101 -91
  86. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
  87. maxframe_client/fetcher.py +23 -42
  88. maxframe_client/session/graph.py +8 -2
  89. maxframe_client/session/odps.py +54 -18
  90. maxframe_client/tests/test_fetcher.py +1 -1
  91. maxframe_client/tests/test_session.py +14 -2
  92. maxframe/core/entity/chunks.py +0 -68
  93. maxframe/core/entity/fuse.py +0 -73
  94. maxframe/core/graph/builder/chunk.py +0 -430
  95. maxframe/odpsio/volumeio.py +0 -95
  96. maxframe-1.0.0rc2.dist-info/METADATA +0 -177
  97. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  98. /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
  99. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  100. /maxframe/tensor/{base → misc}/astype.py +0 -0
  101. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  102. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  103. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  104. /maxframe/tensor/{base → misc}/unique.py +0 -0
  105. /maxframe/tensor/{base → misc}/where.py +0 -0
  106. {maxframe-1.0.0rc2.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,85 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import numpy as np
17
+
18
+ from ...core import ExecutableTuple
19
+ from ..datasource import tensor as astensor
20
+
21
+
22
+ def atleast_3d(*tensors):
23
+ """
24
+ View inputs as tensors with at least three dimensions.
25
+
26
+ Parameters
27
+ ----------
28
+ tensors1, tensors2, ... : array_like
29
+ One or more tensor-like sequences. Non-tensor inputs are converted to
30
+ tensors. Tensors that already have three or more dimensions are
31
+ preserved.
32
+
33
+ Returns
34
+ -------
35
+ res1, res2, ... : Tensor
36
+ A tensor, or list of tensors, each with ``a.ndim >= 3``. Copies are
37
+ avoided where possible, and views with three or more dimensions are
38
+ returned. For example, a 1-D tensor of shape ``(N,)`` becomes a view
39
+ of shape ``(1, N, 1)``, and a 2-D tensor of shape ``(M, N)`` becomes a
40
+ view of shape ``(M, N, 1)``.
41
+
42
+ See Also
43
+ --------
44
+ atleast_1d, atleast_2d
45
+
46
+ Examples
47
+ --------
48
+ >>> import maxframe.tensor as mt
49
+
50
+ >>> mt.atleast_3d(3.0).execute()
51
+ array([[[ 3.]]])
52
+
53
+ >>> x = mt.arange(3.0)
54
+ >>> mt.atleast_3d(x).shape
55
+ (1, 3, 1)
56
+
57
+ >>> x = mt.arange(12.0).reshape(4,3)
58
+ >>> mt.atleast_3d(x).shape
59
+ (4, 3, 1)
60
+
61
+ >>> for arr in mt.atleast_3d([1, 2], [[1, 2]], [[[1, 2]]]).execute():
62
+ ... print(arr, arr.shape)
63
+ ...
64
+ [[[1]
65
+ [2]]] (1, 2, 1)
66
+ [[[1]
67
+ [2]]] (1, 2, 1)
68
+ [[[1 2]]] (1, 1, 2)
69
+
70
+ """
71
+ new_tensors = []
72
+ for x in tensors:
73
+ x = astensor(x)
74
+ if x.ndim == 0:
75
+ x = x[np.newaxis, np.newaxis, np.newaxis]
76
+ elif x.ndim == 1:
77
+ x = x[np.newaxis, :, np.newaxis]
78
+ elif x.ndim == 2:
79
+ x = x[:, :, None]
80
+
81
+ new_tensors.append(x)
82
+
83
+ if len(new_tensors) == 1:
84
+ return new_tensors[0]
85
+ return ExecutableTuple(new_tensors)
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2024 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,33 +64,39 @@ class TensorTranspose(TensorHasInput, TensorOperatorMixin):
66
64
 
67
65
  def transpose(a, axes=None):
68
66
  """
69
- Permute the dimensions of a tensor.
67
+ Returns an array with axes transposed.
68
+
69
+ For a 1-D array, this returns an unchanged view of the original array, as a
70
+ transposed vector is simply the same vector.
71
+ To convert a 1-D array into a 2-D column vector, an additional dimension
72
+ must be added, e.g., ``mt.atleast_2d(a).T`` achieves this, as does
73
+ ``a[:, mt.newaxis]``.
74
+ For a 2-D array, this is the standard matrix transpose.
75
+ For an n-D array, if axes are given, their order indicates how the
76
+ axes are permuted (see Examples). If axes are not provided, then
77
+ ``transpose(a).shape == a.shape[::-1]``.
70
78
 
71
79
  Parameters
72
80
  ----------
73
81
  a : array_like
74
- Input tensor.
75
- axes : list of ints, optional
76
- By default, reverse the dimensions, otherwise permute the axes
77
- according to the values given.
82
+ Input array.
83
+ axes : tuple or list of ints, optional
84
+ If specified, it must be a tuple or list which contains a permutation
85
+ of [0,1,...,N-1] where N is the number of axes of `a`. The `i`'th axis
86
+ of the returned array will correspond to the axis numbered ``axes[i]``
87
+ of the input. If not specified, defaults to ``range(a.ndim)[::-1]``,
88
+ which reverses the order of the axes.
78
89
 
79
90
  Returns
80
91
  -------
81
- p : Tensor
82
- `a` with its axes permuted. A view is returned whenever
83
- possible.
84
-
85
- See Also
86
- --------
87
- moveaxis
88
- argsort
92
+ p : ndarray
93
+ `a` with its axes permuted. A view is returned whenever possible.
89
94
 
90
95
  Notes
91
96
  -----
92
- Use `transpose(a, argsort(axes))` to invert the transposition of tensors
97
+ Use ``transpose(a, argsort(axes))`` to invert the transposition of tensors
93
98
  when using the `axes` keyword argument.
94
99
 
95
- Transposing a 1-D array returns an unchanged view of the original tensor.
96
100
 
97
101
  Examples
98
102
  --------
@@ -121,5 +125,5 @@ def transpose(a, axes=None):
121
125
  axes = list(range(a.ndim))[::-1]
122
126
  else:
123
127
  axes = list(axes)
124
- op = TensorTranspose(axes, dtype=a.dtype, sparse=a.issparse())
128
+ op = TensorTranspose(axes)
125
129
  return op(a)
@@ -12,9 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+
15
16
  from ..core import OutputType
16
17
  from ..core.operator import (
17
- Fuse,
18
18
  HasInput,
19
19
  MapReduceOperator,
20
20
  Operator,
@@ -115,9 +115,3 @@ class TensorMapReduceOperator(MapReduceOperator):
115
115
  _output_type_ = OutputType.tensor
116
116
 
117
117
  dtype = DataTypeField("dtype", default=None)
118
-
119
-
120
- class TensorFuse(Fuse):
121
- _output_type_ = OutputType.tensor
122
-
123
- dtype = DataTypeField("dtype", default=None)
@@ -19,9 +19,9 @@ from contextlib import contextmanager
19
19
  import numpy as np
20
20
 
21
21
  from ...serialization.serializables import FieldTypes, Int32Field, TupleField
22
- from ..base import broadcast_to
23
22
  from ..core import TENSOR_TYPE
24
23
  from ..datasource import tensor as astensor
24
+ from ..misc import broadcast_to
25
25
  from ..operators import TensorMapReduceOperator, TensorOperator, TensorOperatorMixin
26
26
  from ..utils import broadcast_shape
27
27
 
@@ -22,6 +22,7 @@ from .core import TensorReduction, TensorReductionMixin
22
22
 
23
23
  class TensorCountNonzero(TensorReduction, TensorReductionMixin):
24
24
  _op_type_ = opcodes.COUNT_NONZERO
25
+ _func_name = "count_nonzero"
25
26
 
26
27
  def __init__(self, dtype=None, **kw):
27
28
  if dtype is None:
@@ -23,6 +23,7 @@ from .core import TensorReduction, TensorReductionMixin
23
23
 
24
24
  class TensorMean(TensorReduction, TensorReductionMixin):
25
25
  _op_type_ = opcodes.MEAN
26
+ _func_name = "mean"
26
27
 
27
28
 
28
29
  def mean(a, axis=None, dtype=None, out=None, keepdims=None):
@@ -23,6 +23,7 @@ from .core import TensorReduction, TensorReductionMixin
23
23
 
24
24
  class TensorNanMean(TensorReduction, TensorReductionMixin):
25
25
  _op_type_ = opcodes.NANMEAN
26
+ _func_name = "nanmean"
26
27
 
27
28
 
28
29
  def nanmean(a, axis=None, dtype=None, out=None, keepdims=None):
@@ -24,6 +24,7 @@ from .core import TensorReduction, TensorReductionMixin
24
24
 
25
25
  class TensorNanMoment(TensorReduction, TensorReductionMixin):
26
26
  _op_type_ = opcodes.NANMOMENT
27
+ _func_name = "nanvar"
27
28
 
28
29
  moment = Int32Field("moment", default=2)
29
30
  ddof = Int32Field("ddof", default=None)
@@ -36,6 +37,7 @@ class TensorNanMoment(TensorReduction, TensorReductionMixin):
36
37
 
37
38
  class TensorNanVar(TensorReduction, TensorReductionMixin):
38
39
  _op_type_ = opcodes.NANVAR
40
+ _func_name = "nanvar"
39
41
 
40
42
  ddof = Int32Field("ddof", default=0)
41
43
 
@@ -17,8 +17,11 @@
17
17
  import numpy as np
18
18
  import pytest
19
19
 
20
+ from maxframe.tensor.reduction.core import TensorReduction
21
+
22
+ from ....utils import collect_leaf_operators
20
23
  from ...datasource import ones, tensor
21
- from .. import all
24
+ from .. import * # noqa: F401
22
25
 
23
26
 
24
27
  def test_base_reduction():
@@ -179,3 +182,11 @@ def test_var_reduction():
179
182
 
180
183
  res1 = var(ones((10, 8, 8), chunk_size=3), axis=1)
181
184
  assert res1.shape == (10, 8)
185
+
186
+
187
+ def test_reduction_op_func_name():
188
+ # make sure all the binary op has defined the func name.
189
+
190
+ results = collect_leaf_operators(TensorReduction)
191
+ for op_type in results:
192
+ assert hasattr(op_type, "_func_name")
@@ -42,6 +42,7 @@ def reduce_var_square(var_square, avg_diff, count, op, axis, sum_func):
42
42
 
43
43
  class TensorMoment(TensorReduction, TensorReductionMixin):
44
44
  _op_type_ = opcodes.MOMENT
45
+ _func_name = "var"
45
46
 
46
47
  moment = Int32Field("moment", default=2)
47
48
  ddof = Int32Field("ddof", default=None)
@@ -54,6 +55,7 @@ class TensorMoment(TensorReduction, TensorReductionMixin):
54
55
 
55
56
  class TensorVar(TensorReduction, TensorReductionMixin):
56
57
  _op_type_ = opcodes.VAR
58
+ _func_name = "var"
57
59
 
58
60
  ddof = Int32Field("ddof", default=0)
59
61
 
maxframe/tensor/utils.py CHANGED
@@ -19,18 +19,13 @@ import itertools
19
19
  import operator
20
20
  from collections import OrderedDict
21
21
  from collections.abc import Iterable
22
- from functools import lru_cache, wraps
22
+ from functools import wraps
23
23
  from math import ceil
24
24
  from numbers import Integral
25
25
  from typing import Dict, List, Union
26
26
 
27
27
  import numpy as np
28
28
 
29
- try:
30
- import tiledb
31
- except (ImportError, OSError): # pragma: no cover
32
- tildb = None
33
-
34
29
  from ..core import ExecutableTuple
35
30
  from ..lib.mmh3 import hash_from_buffer
36
31
  from ..utils import lazy_import
@@ -508,7 +503,7 @@ def decide_unify_split(*splits):
508
503
 
509
504
 
510
505
  def check_out_param(out, t, casting):
511
- from .base import broadcast_to
506
+ from .misc import broadcast_to
512
507
 
513
508
  if not hasattr(out, "shape"):
514
509
  raise TypeError("return arrays must be a tensor")
@@ -563,21 +558,6 @@ def filter_inputs(inputs):
563
558
  return [inp for inp in inputs if isinstance(inp, ENTITY_TYPE)]
564
559
 
565
560
 
566
- # As TileDB Ctx's creation is a bit time-consuming,
567
- # we just cache the Ctx
568
- # also remember the arguments should be hashable
569
- @lru_cache(10)
570
- def _create_tiledb_ctx(conf_tuple):
571
- if conf_tuple is not None:
572
- return tiledb.Ctx(dict(conf_tuple))
573
- return tiledb.Ctx()
574
-
575
-
576
- def get_tiledb_ctx(conf):
577
- key = tuple(conf.items()) if conf is not None else None
578
- return _create_tiledb_ctx(key)
579
-
580
-
581
561
  # this function is only used for pandas' compatibility
582
562
  def to_numpy(pdf):
583
563
  try:
maxframe/typing_.py CHANGED
@@ -12,11 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TypeVar, Union
15
+ from numbers import Integral
16
+ from typing import List, TypeVar, Union
16
17
 
17
18
  import pandas as pd
18
19
  import pyarrow as pa
19
20
 
21
+ SlicesType = List[Union[None, Integral, slice]]
22
+
20
23
  TimeoutType = Union[int, float, None]
21
24
 
22
25
 
maxframe/udf.py CHANGED
@@ -29,28 +29,25 @@ from .utils import tokenize
29
29
 
30
30
 
31
31
  class PythonPackOptions(Serializable):
32
+ _key_args = ("force_rebuild", "prefer_binary", "pre_release", "no_audit_wheel")
33
+
32
34
  key = StringField("key")
33
35
  requirements = ListField("requirements", FieldTypes.string, default_factory=list)
34
36
  force_rebuild = BoolField("force_rebuild", default=False)
35
37
  prefer_binary = BoolField("prefer_binary", default=False)
36
38
  pre_release = BoolField("pre_release", default=False)
37
39
  pack_instance_id = StringField("pack_instance_id", default=None)
40
+ no_audit_wheel = BoolField("no_audit_wheel", default=False)
38
41
 
39
42
  def __init__(self, key: str = None, **kw):
40
43
  super().__init__(key=key, **kw)
41
44
  if self.key is None:
42
- args = {
43
- "force_rebuild": self.force_rebuild,
44
- "prefer_binary": self.prefer_binary,
45
- "pre_release": self.pre_release,
46
- }
45
+ args = {k: getattr(self, k) for k in self._key_args}
47
46
  self.key = tokenize(set(self.requirements), args)
48
47
 
49
48
  def __repr__(self):
50
- return (
51
- f"<PythonPackOptions {self.requirements} force_rebuild={self.force_rebuild} "
52
- f"prefer_binary={self.prefer_binary} pre_release={self.pre_release}>"
53
- )
49
+ args_str = " ".join(f"{k}={getattr(self, k)}" for k in self._key_args)
50
+ return f"<PythonPackOptions {self.requirements} {args_str}>"
54
51
 
55
52
 
56
53
  class MarkedFunction(Serializable):
@@ -101,6 +98,7 @@ def with_python_requirements(
101
98
  force_rebuild: bool = False,
102
99
  prefer_binary: bool = False,
103
100
  pre_release: bool = False,
101
+ no_audit_wheel: bool = False,
104
102
  ):
105
103
  result_req = []
106
104
  for req in requirements:
@@ -112,6 +110,7 @@ def with_python_requirements(
112
110
  force_rebuild=force_rebuild,
113
111
  prefer_binary=prefer_binary,
114
112
  pre_release=pre_release,
113
+ no_audit_wheel=no_audit_wheel,
115
114
  )
116
115
  if isinstance(func, MarkedFunction):
117
116
  func.pythonpacks.append(pack_item)
maxframe/utils.py CHANGED
@@ -19,7 +19,6 @@ import dataclasses
19
19
  import datetime
20
20
  import enum
21
21
  import functools
22
- import hashlib
23
22
  import importlib
24
23
  import inspect
25
24
  import io
@@ -75,7 +74,7 @@ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
75
74
  tokenize_int,
76
75
  )
77
76
  from .lib.version import parse as parse_version
78
- from .typing_ import ChunkType, EntityType, TileableType, TimeoutType
77
+ from .typing_ import TileableType, TimeoutType
79
78
 
80
79
  # make flake8 happy by referencing these imports
81
80
  NamedType = NamedType
@@ -245,58 +244,6 @@ def copy_tileables(tileables: List[TileableType], **kwargs):
245
244
  return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
246
245
 
247
246
 
248
- def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
249
- from .core.operator import ShuffleProxy
250
-
251
- chunk_op = chunk.op
252
- params = chunk.params.copy()
253
- assert not isinstance(chunk_op, ShuffleProxy)
254
- # for non-shuffle nodes, we build Fetch chunks
255
- # to replace original chunk
256
- op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
257
- return op.new_chunk(
258
- None,
259
- is_broadcaster=chunk.is_broadcaster,
260
- kws=[params],
261
- _key=chunk.key,
262
- **kwargs,
263
- )
264
-
265
-
266
- def build_fetch_tileable(tileable: TileableType) -> TileableType:
267
- if tileable.is_coarse():
268
- chunks = None
269
- else:
270
- chunks = []
271
- for c in tileable.chunks:
272
- fetch_chunk = build_fetch_chunk(c, index=c.index)
273
- chunks.append(fetch_chunk)
274
-
275
- tileable_op = tileable.op
276
- params = tileable.params.copy()
277
-
278
- new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
279
- return new_op.new_tileables(
280
- None,
281
- chunks=chunks,
282
- nsplits=tileable.nsplits,
283
- _key=tileable.key,
284
- _id=tileable.id,
285
- **params,
286
- )[0]
287
-
288
-
289
- def build_fetch(entity: EntityType) -> EntityType:
290
- from .core import CHUNK_TYPE, ENTITY_TYPE
291
-
292
- if isinstance(entity, CHUNK_TYPE):
293
- return build_fetch_chunk(entity)
294
- elif isinstance(entity, ENTITY_TYPE):
295
- return build_fetch_tileable(entity)
296
- else:
297
- raise TypeError(f"Type {type(entity)} not supported")
298
-
299
-
300
247
  def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
301
248
  if pd.api.types.is_extension_array_dtype(dtype):
302
249
  return dtype
@@ -386,13 +333,7 @@ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> st
386
333
 
387
334
 
388
335
  def build_session_volume_name(session_id: str) -> str:
389
- return f"mf_vol_{session_id}"
390
-
391
-
392
- def build_tileable_dir_name(tileable_key: str) -> str:
393
- m = hashlib.md5()
394
- m.update(f"mf_dir_{tileable_key}".encode())
395
- return m.hexdigest()
336
+ return f"mf_vol_{session_id.replace('-', '_')}"
396
337
 
397
338
 
398
339
  async def wait_http_response(
@@ -1123,3 +1064,16 @@ def get_item_if_scalar(val: Any) -> Any:
1123
1064
  if isinstance(val, np.ndarray) and val.shape == ():
1124
1065
  return val.item()
1125
1066
  return val
1067
+
1068
+
1069
+ def collect_leaf_operators(root) -> List[Type]:
1070
+ result = []
1071
+
1072
+ def _collect(op_type):
1073
+ if len(op_type.__subclasses__()) == 0:
1074
+ result.append(op_type)
1075
+ for subclass in op_type.__subclasses__():
1076
+ _collect(subclass)
1077
+
1078
+ _collect(root)
1079
+ return result
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.1
2
+ Name: maxframe
3
+ Version: 1.0.0rc3
4
+ Summary: MaxFrame operator-based data analyze framework
5
+ Requires-Dist: numpy<2.0.0,>=1.19.0
6
+ Requires-Dist: pandas>=1.0.0
7
+ Requires-Dist: pyodps>=0.11.6.1
8
+ Requires-Dist: scipy>=1.0
9
+ Requires-Dist: pyarrow>=1.0.0
10
+ Requires-Dist: msgpack>=1.0.0
11
+ Requires-Dist: traitlets>=5.0
12
+ Requires-Dist: cloudpickle<3.0.0,>=1.5.0
13
+ Requires-Dist: pyyaml>=5.1
14
+ Requires-Dist: tornado>=6.0
15
+ Requires-Dist: defusedxml>=0.5.0
16
+ Requires-Dist: tqdm>=4.1.0
17
+ Requires-Dist: importlib-metadata>=1.4
18
+ Requires-Dist: pickle5; python_version < "3.8"
19
+ Provides-Extra: dev
20
+ Requires-Dist: black>=22.3.0; extra == "dev"
21
+ Requires-Dist: flake8>=5.0.4; extra == "dev"
22
+ Requires-Dist: pre-commit>=2.15.0; extra == "dev"
23
+ Requires-Dist: graphviz>=0.20.1; extra == "dev"
24
+ Provides-Extra: test
25
+ Requires-Dist: mock; extra == "test"
26
+ Requires-Dist: pytest>=7.3.1; extra == "test"
27
+ Requires-Dist: pytest-cov>=4.1.0; extra == "test"
28
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
29
+ Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
30
+ Requires-Dist: matplotlib>=2.0.0; extra == "test"
31
+
32
+ MaxCompute MaxFrame Client
33
+ ==========================
34
+
35
+ MaxFrame is a computational framework created by Alibaba Cloud to
36
+ provide a way for Python developers to parallelize their code with
37
+ MaxCompute. It creates a runnable computation graph locally, submits it
38
+ to MaxCompute to execute and obtains results from MaxCompute.
39
+
40
+ MaxFrame client is the client of MaxFrame. Currently it provides a
41
+ DataFrame-based SDK with compatible APIs for pandas. In future, other
42
+ common Python libraries like numpy and scikit-learn will be added as
43
+ well. Python 3.7 is recommended for MaxFrame client to enable all
44
+ functionalities while supports for higher Python versions are on the
45
+ way.
46
+
47
+ Installation
48
+ ------------
49
+
50
+ You may install MaxFrame client through PIP:
51
+
52
+ .. code:: bash
53
+
54
+ pip install maxframe
55
+
56
+ Latest beta version can be installed with ``--pre`` argument:
57
+
58
+ .. code:: bash
59
+
60
+ pip install --pre maxframe
61
+
62
+ You can also install MaxFrame client from source code:
63
+
64
+ .. code:: bash
65
+
66
+ pip install git+https://github.com/aliyun/alibabacloud-odps-maxframe-client.git
67
+
68
+ Getting started
69
+ ---------------
70
+
71
+ We show a simple code example of MaxFrame client which read data from a
72
+ MaxCompute table, performs some simple data transform and writes back
73
+ into MaxCompute.
74
+
75
+ .. code:: python
76
+
77
+ import maxframe.dataframe as md
78
+ import os
79
+ from maxframe import new_session
80
+ from odps import ODPS
81
+
82
+ o = ODPS(
83
+ os.getenv('ALIBABA_CLOUD_ACCESS_KEY_ID'),
84
+ os.getenv('ALIBABA_CLOUD_ACCESS_KEY_SECRET'),
85
+ project='your-default-project',
86
+ endpoint='your-end-point',
87
+ )
88
+ session = new_session(o)
89
+
90
+ df = md.read_odps_table("source_table")
91
+ df["A"] = "prefix_" + df["A"]
92
+ md.to_odps_table(df, "prefix_source_table")
93
+
94
+ Documentation
95
+ -------------
96
+
97
+ Detailed documentations can be found
98
+ `here <https://maxframe.readthedocs.io>`__.
99
+
100
+ License
101
+ -------
102
+
103
+ Licensed under the `Apache License
104
+ 2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__.