maxframe 1.0.0rc1__cp311-cp311-macosx_10_9_universal2.whl → 1.0.0rc3__cp311-cp311-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (138) hide show
  1. maxframe/_utils.cpython-311-darwin.so +0 -0
  2. maxframe/codegen.py +3 -6
  3. maxframe/config/config.py +49 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +15 -2
  6. maxframe/core/__init__.py +2 -13
  7. maxframe/core/entity/__init__.py +0 -4
  8. maxframe/core/entity/objects.py +46 -3
  9. maxframe/core/entity/output_types.py +0 -3
  10. maxframe/core/entity/tests/test_objects.py +43 -0
  11. maxframe/core/entity/tileables.py +5 -78
  12. maxframe/core/graph/__init__.py +2 -2
  13. maxframe/core/graph/builder/__init__.py +0 -1
  14. maxframe/core/graph/builder/base.py +5 -4
  15. maxframe/core/graph/builder/tileable.py +4 -4
  16. maxframe/core/graph/builder/utils.py +4 -8
  17. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  18. maxframe/core/graph/entity.py +9 -33
  19. maxframe/core/operator/__init__.py +2 -9
  20. maxframe/core/operator/base.py +3 -5
  21. maxframe/core/operator/objects.py +0 -9
  22. maxframe/core/operator/utils.py +55 -0
  23. maxframe/dataframe/__init__.py +1 -1
  24. maxframe/dataframe/arithmetic/around.py +5 -17
  25. maxframe/dataframe/arithmetic/core.py +15 -7
  26. maxframe/dataframe/arithmetic/docstring.py +5 -55
  27. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
  28. maxframe/dataframe/core.py +5 -5
  29. maxframe/dataframe/datasource/date_range.py +2 -2
  30. maxframe/dataframe/datasource/read_odps_query.py +7 -1
  31. maxframe/dataframe/datasource/read_odps_table.py +3 -2
  32. maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
  33. maxframe/dataframe/datastore/to_odps.py +1 -1
  34. maxframe/dataframe/groupby/cum.py +0 -1
  35. maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
  36. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  37. maxframe/dataframe/indexing/rename.py +3 -37
  38. maxframe/dataframe/indexing/sample.py +0 -1
  39. maxframe/dataframe/indexing/set_index.py +68 -1
  40. maxframe/dataframe/merge/merge.py +236 -2
  41. maxframe/dataframe/merge/tests/test_merge.py +123 -0
  42. maxframe/dataframe/misc/apply.py +3 -10
  43. maxframe/dataframe/misc/case_when.py +1 -1
  44. maxframe/dataframe/misc/describe.py +2 -2
  45. maxframe/dataframe/misc/drop_duplicates.py +4 -25
  46. maxframe/dataframe/misc/eval.py +4 -0
  47. maxframe/dataframe/misc/pct_change.py +1 -83
  48. maxframe/dataframe/misc/transform.py +1 -30
  49. maxframe/dataframe/misc/value_counts.py +4 -17
  50. maxframe/dataframe/missing/dropna.py +1 -1
  51. maxframe/dataframe/missing/fillna.py +5 -5
  52. maxframe/dataframe/operators.py +1 -17
  53. maxframe/dataframe/reduction/core.py +2 -2
  54. maxframe/dataframe/sort/sort_values.py +1 -11
  55. maxframe/dataframe/statistics/quantile.py +5 -17
  56. maxframe/dataframe/utils.py +4 -7
  57. maxframe/io/objects/__init__.py +24 -0
  58. maxframe/io/objects/core.py +140 -0
  59. maxframe/io/objects/tensor.py +76 -0
  60. maxframe/io/objects/tests/__init__.py +13 -0
  61. maxframe/io/objects/tests/test_object_io.py +97 -0
  62. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  63. maxframe/{odpsio → io/odpsio}/arrow.py +12 -8
  64. maxframe/{odpsio → io/odpsio}/schema.py +15 -12
  65. maxframe/io/odpsio/tableio.py +702 -0
  66. maxframe/io/odpsio/tests/__init__.py +13 -0
  67. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +19 -18
  68. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  69. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  70. maxframe/io/odpsio/volumeio.py +57 -0
  71. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  72. maxframe/learn/contrib/xgboost/core.py +87 -2
  73. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  74. maxframe/learn/contrib/xgboost/predict.py +21 -7
  75. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  76. maxframe/learn/contrib/xgboost/train.py +27 -17
  77. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  78. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  79. maxframe/protocol.py +41 -17
  80. maxframe/remote/core.py +4 -8
  81. maxframe/serialization/__init__.py +1 -0
  82. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  83. maxframe/serialization/serializables/core.py +48 -9
  84. maxframe/tensor/__init__.py +69 -2
  85. maxframe/tensor/arithmetic/isclose.py +1 -0
  86. maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
  87. maxframe/tensor/core.py +5 -136
  88. maxframe/tensor/datasource/array.py +3 -0
  89. maxframe/tensor/datasource/full.py +1 -1
  90. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  91. maxframe/tensor/indexing/flatnonzero.py +1 -1
  92. maxframe/tensor/merge/__init__.py +2 -0
  93. maxframe/tensor/merge/concatenate.py +98 -0
  94. maxframe/tensor/merge/tests/test_merge.py +30 -1
  95. maxframe/tensor/merge/vstack.py +70 -0
  96. maxframe/tensor/{base → misc}/__init__.py +2 -0
  97. maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
  98. maxframe/tensor/misc/atleast_2d.py +70 -0
  99. maxframe/tensor/misc/atleast_3d.py +85 -0
  100. maxframe/tensor/misc/tests/__init__.py +13 -0
  101. maxframe/tensor/{base → misc}/transpose.py +22 -18
  102. maxframe/tensor/{base → misc}/unique.py +2 -2
  103. maxframe/tensor/operators.py +1 -7
  104. maxframe/tensor/random/core.py +1 -1
  105. maxframe/tensor/reduction/count_nonzero.py +1 -0
  106. maxframe/tensor/reduction/mean.py +1 -0
  107. maxframe/tensor/reduction/nanmean.py +1 -0
  108. maxframe/tensor/reduction/nanvar.py +2 -0
  109. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  110. maxframe/tensor/reduction/var.py +2 -0
  111. maxframe/tensor/statistics/quantile.py +2 -2
  112. maxframe/tensor/utils.py +2 -22
  113. maxframe/tests/utils.py +11 -2
  114. maxframe/typing_.py +4 -1
  115. maxframe/udf.py +8 -9
  116. maxframe/utils.py +32 -70
  117. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/METADATA +25 -25
  118. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +133 -123
  119. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +1 -1
  120. maxframe_client/fetcher.py +60 -68
  121. maxframe_client/session/graph.py +8 -2
  122. maxframe_client/session/odps.py +58 -22
  123. maxframe_client/tests/test_fetcher.py +21 -3
  124. maxframe_client/tests/test_session.py +27 -4
  125. maxframe/core/entity/chunks.py +0 -68
  126. maxframe/core/entity/fuse.py +0 -73
  127. maxframe/core/graph/builder/chunk.py +0 -430
  128. maxframe/odpsio/tableio.py +0 -322
  129. maxframe/odpsio/volumeio.py +0 -95
  130. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  131. /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
  132. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  133. /maxframe/tensor/{base → misc}/astype.py +0 -0
  134. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  135. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  136. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  137. /maxframe/tensor/{base → misc}/where.py +0 -0
  138. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2024 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -66,33 +64,39 @@ class TensorTranspose(TensorHasInput, TensorOperatorMixin):
66
64
 
67
65
  def transpose(a, axes=None):
68
66
  """
69
- Permute the dimensions of a tensor.
67
+ Returns an array with axes transposed.
68
+
69
+ For a 1-D array, this returns an unchanged view of the original array, as a
70
+ transposed vector is simply the same vector.
71
+ To convert a 1-D array into a 2-D column vector, an additional dimension
72
+ must be added, e.g., ``mt.atleast_2d(a).T`` achieves this, as does
73
+ ``a[:, mt.newaxis]``.
74
+ For a 2-D array, this is the standard matrix transpose.
75
+ For an n-D array, if axes are given, their order indicates how the
76
+ axes are permuted (see Examples). If axes are not provided, then
77
+ ``transpose(a).shape == a.shape[::-1]``.
70
78
 
71
79
  Parameters
72
80
  ----------
73
81
  a : array_like
74
- Input tensor.
75
- axes : list of ints, optional
76
- By default, reverse the dimensions, otherwise permute the axes
77
- according to the values given.
82
+ Input array.
83
+ axes : tuple or list of ints, optional
84
+ If specified, it must be a tuple or list which contains a permutation
85
+ of [0,1,...,N-1] where N is the number of axes of `a`. The `i`'th axis
86
+ of the returned array will correspond to the axis numbered ``axes[i]``
87
+ of the input. If not specified, defaults to ``range(a.ndim)[::-1]``,
88
+ which reverses the order of the axes.
78
89
 
79
90
  Returns
80
91
  -------
81
- p : Tensor
82
- `a` with its axes permuted. A view is returned whenever
83
- possible.
84
-
85
- See Also
86
- --------
87
- moveaxis
88
- argsort
92
+ p : ndarray
93
+ `a` with its axes permuted. A view is returned whenever possible.
89
94
 
90
95
  Notes
91
96
  -----
92
- Use `transpose(a, argsort(axes))` to invert the transposition of tensors
97
+ Use ``transpose(a, argsort(axes))`` to invert the transposition of tensors
93
98
  when using the `axes` keyword argument.
94
99
 
95
- Transposing a 1-D array returns an unchanged view of the original tensor.
96
100
 
97
101
  Examples
98
102
  --------
@@ -121,5 +125,5 @@ def transpose(a, axes=None):
121
125
  axes = list(range(a.ndim))[::-1]
122
126
  else:
123
127
  axes = list(axes)
124
- op = TensorTranspose(axes, dtype=a.dtype, sparse=a.issparse())
128
+ op = TensorTranspose(axes)
125
129
  return op(a)
@@ -15,7 +15,7 @@
15
15
 
16
16
  import numpy as np
17
17
 
18
- from ... import opcodes as OperandDef
18
+ from ... import opcodes
19
19
  from ...serialization.serializables import BoolField, Int32Field
20
20
  from ..core import TensorOrder
21
21
  from ..operators import TensorHasInput, TensorOperatorMixin
@@ -23,7 +23,7 @@ from ..utils import validate_axis
23
23
 
24
24
 
25
25
  class TensorUnique(TensorHasInput, TensorOperatorMixin):
26
- _op_type_ = OperandDef.UNIQUE
26
+ _op_type_ = opcodes.UNIQUE
27
27
 
28
28
  return_index = BoolField("return_index", default=False)
29
29
  return_inverse = BoolField("return_inverse", default=False)
@@ -12,9 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+
15
16
  from ..core import OutputType
16
17
  from ..core.operator import (
17
- Fuse,
18
18
  HasInput,
19
19
  MapReduceOperator,
20
20
  Operator,
@@ -115,9 +115,3 @@ class TensorMapReduceOperator(MapReduceOperator):
115
115
  _output_type_ = OutputType.tensor
116
116
 
117
117
  dtype = DataTypeField("dtype", default=None)
118
-
119
-
120
- class TensorFuse(Fuse):
121
- _output_type_ = OutputType.tensor
122
-
123
- dtype = DataTypeField("dtype", default=None)
@@ -19,9 +19,9 @@ from contextlib import contextmanager
19
19
  import numpy as np
20
20
 
21
21
  from ...serialization.serializables import FieldTypes, Int32Field, TupleField
22
- from ..base import broadcast_to
23
22
  from ..core import TENSOR_TYPE
24
23
  from ..datasource import tensor as astensor
24
+ from ..misc import broadcast_to
25
25
  from ..operators import TensorMapReduceOperator, TensorOperator, TensorOperatorMixin
26
26
  from ..utils import broadcast_shape
27
27
 
@@ -22,6 +22,7 @@ from .core import TensorReduction, TensorReductionMixin
22
22
 
23
23
  class TensorCountNonzero(TensorReduction, TensorReductionMixin):
24
24
  _op_type_ = opcodes.COUNT_NONZERO
25
+ _func_name = "count_nonzero"
25
26
 
26
27
  def __init__(self, dtype=None, **kw):
27
28
  if dtype is None:
@@ -23,6 +23,7 @@ from .core import TensorReduction, TensorReductionMixin
23
23
 
24
24
  class TensorMean(TensorReduction, TensorReductionMixin):
25
25
  _op_type_ = opcodes.MEAN
26
+ _func_name = "mean"
26
27
 
27
28
 
28
29
  def mean(a, axis=None, dtype=None, out=None, keepdims=None):
@@ -23,6 +23,7 @@ from .core import TensorReduction, TensorReductionMixin
23
23
 
24
24
  class TensorNanMean(TensorReduction, TensorReductionMixin):
25
25
  _op_type_ = opcodes.NANMEAN
26
+ _func_name = "nanmean"
26
27
 
27
28
 
28
29
  def nanmean(a, axis=None, dtype=None, out=None, keepdims=None):
@@ -24,6 +24,7 @@ from .core import TensorReduction, TensorReductionMixin
24
24
 
25
25
  class TensorNanMoment(TensorReduction, TensorReductionMixin):
26
26
  _op_type_ = opcodes.NANMOMENT
27
+ _func_name = "nanvar"
27
28
 
28
29
  moment = Int32Field("moment", default=2)
29
30
  ddof = Int32Field("ddof", default=None)
@@ -36,6 +37,7 @@ class TensorNanMoment(TensorReduction, TensorReductionMixin):
36
37
 
37
38
  class TensorNanVar(TensorReduction, TensorReductionMixin):
38
39
  _op_type_ = opcodes.NANVAR
40
+ _func_name = "nanvar"
39
41
 
40
42
  ddof = Int32Field("ddof", default=0)
41
43
 
@@ -17,8 +17,11 @@
17
17
  import numpy as np
18
18
  import pytest
19
19
 
20
+ from maxframe.tensor.reduction.core import TensorReduction
21
+
22
+ from ....utils import collect_leaf_operators
20
23
  from ...datasource import ones, tensor
21
- from .. import all
24
+ from .. import * # noqa: F401
22
25
 
23
26
 
24
27
  def test_base_reduction():
@@ -179,3 +182,11 @@ def test_var_reduction():
179
182
 
180
183
  res1 = var(ones((10, 8, 8), chunk_size=3), axis=1)
181
184
  assert res1.shape == (10, 8)
185
+
186
+
187
+ def test_reduction_op_func_name():
188
+ # make sure all the binary op has defined the func name.
189
+
190
+ results = collect_leaf_operators(TensorReduction)
191
+ for op_type in results:
192
+ assert hasattr(op_type, "_func_name")
@@ -42,6 +42,7 @@ def reduce_var_square(var_square, avg_diff, count, op, axis, sum_func):
42
42
 
43
43
  class TensorMoment(TensorReduction, TensorReductionMixin):
44
44
  _op_type_ = opcodes.MOMENT
45
+ _func_name = "var"
45
46
 
46
47
  moment = Int32Field("moment", default=2)
47
48
  ddof = Int32Field("ddof", default=None)
@@ -54,6 +55,7 @@ class TensorMoment(TensorReduction, TensorReductionMixin):
54
55
 
55
56
  class TensorVar(TensorReduction, TensorReductionMixin):
56
57
  _op_type_ = opcodes.VAR
58
+ _func_name = "var"
57
59
 
58
60
  ddof = Int32Field("ddof", default=0)
59
61
 
@@ -16,7 +16,7 @@ from collections.abc import Iterable
16
16
 
17
17
  import numpy as np
18
18
 
19
- from ... import opcodes as OperandDef
19
+ from ... import opcodes
20
20
  from ...core import ENTITY_TYPE
21
21
  from ...serialization.serializables import AnyField, BoolField, KeyField, StringField
22
22
  from ..core import TENSOR_TYPE, TensorOrder
@@ -43,7 +43,7 @@ q_error_msg = "Quantiles must be in the range [0, 1]"
43
43
 
44
44
  class TensorQuantile(TensorOperator, TensorOperatorMixin):
45
45
  __slots__ = ("q_error_msg",)
46
- _op_type_ = OperandDef.QUANTILE
46
+ _op_type_ = opcodes.QUANTILE
47
47
 
48
48
  a = KeyField("a")
49
49
  q = AnyField("q")
maxframe/tensor/utils.py CHANGED
@@ -19,18 +19,13 @@ import itertools
19
19
  import operator
20
20
  from collections import OrderedDict
21
21
  from collections.abc import Iterable
22
- from functools import lru_cache, wraps
22
+ from functools import wraps
23
23
  from math import ceil
24
24
  from numbers import Integral
25
25
  from typing import Dict, List, Union
26
26
 
27
27
  import numpy as np
28
28
 
29
- try:
30
- import tiledb
31
- except (ImportError, OSError): # pragma: no cover
32
- tildb = None
33
-
34
29
  from ..core import ExecutableTuple
35
30
  from ..lib.mmh3 import hash_from_buffer
36
31
  from ..utils import lazy_import
@@ -508,7 +503,7 @@ def decide_unify_split(*splits):
508
503
 
509
504
 
510
505
  def check_out_param(out, t, casting):
511
- from .base import broadcast_to
506
+ from .misc import broadcast_to
512
507
 
513
508
  if not hasattr(out, "shape"):
514
509
  raise TypeError("return arrays must be a tensor")
@@ -563,21 +558,6 @@ def filter_inputs(inputs):
563
558
  return [inp for inp in inputs if isinstance(inp, ENTITY_TYPE)]
564
559
 
565
560
 
566
- # As TileDB Ctx's creation is a bit time-consuming,
567
- # we just cache the Ctx
568
- # also remember the arguments should be hashable
569
- @lru_cache(10)
570
- def _create_tiledb_ctx(conf_tuple):
571
- if conf_tuple is not None:
572
- return tiledb.Ctx(dict(conf_tuple))
573
- return tiledb.Ctx()
574
-
575
-
576
- def get_tiledb_ctx(conf):
577
- key = tuple(conf.items()) if conf is not None else None
578
- return _create_tiledb_ctx(key)
579
-
580
-
581
561
  # this function is only used for pandas' compatibility
582
562
  def to_numpy(pdf):
583
563
  try:
maxframe/tests/utils.py CHANGED
@@ -14,6 +14,7 @@
14
14
 
15
15
  import asyncio
16
16
  import functools
17
+ import hashlib
17
18
  import os
18
19
  import queue
19
20
  import socket
@@ -25,7 +26,7 @@ import pytest
25
26
  from tornado import netutil
26
27
 
27
28
  from ..core import Tileable, TileableGraph
28
- from ..utils import create_event, lazy_import
29
+ from ..utils import create_sync_primitive, lazy_import, to_binary
29
30
 
30
31
  try:
31
32
  from flaky import flaky
@@ -102,7 +103,7 @@ def run_app_in_thread(app_func):
102
103
  def fixture_func(*args, **kwargs):
103
104
  app_loop = asyncio.new_event_loop()
104
105
  q = queue.Queue()
105
- exit_event = create_event(app_loop)
106
+ exit_event = create_sync_primitive(asyncio.Event, app_loop)
106
107
  app_thread = Thread(
107
108
  name="TestAppThread",
108
109
  target=app_thread_func,
@@ -162,3 +163,11 @@ def require_hadoop(func):
162
163
  not os.environ.get("WITH_HADOOP"), reason="Only run when hadoop is installed"
163
164
  )(func)
164
165
  return func
166
+
167
+
168
+ def get_test_unique_name(size=None):
169
+ test_name = os.getenv("PYTEST_CURRENT_TEST", "pyodps_test")
170
+ digest = hashlib.md5(to_binary(test_name)).hexdigest()
171
+ if size:
172
+ digest = digest[:size]
173
+ return digest + "_" + str(os.getpid())
maxframe/typing_.py CHANGED
@@ -12,11 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TypeVar, Union
15
+ from numbers import Integral
16
+ from typing import List, TypeVar, Union
16
17
 
17
18
  import pandas as pd
18
19
  import pyarrow as pa
19
20
 
21
+ SlicesType = List[Union[None, Integral, slice]]
22
+
20
23
  TimeoutType = Union[int, float, None]
21
24
 
22
25
 
maxframe/udf.py CHANGED
@@ -29,28 +29,25 @@ from .utils import tokenize
29
29
 
30
30
 
31
31
  class PythonPackOptions(Serializable):
32
+ _key_args = ("force_rebuild", "prefer_binary", "pre_release", "no_audit_wheel")
33
+
32
34
  key = StringField("key")
33
35
  requirements = ListField("requirements", FieldTypes.string, default_factory=list)
34
36
  force_rebuild = BoolField("force_rebuild", default=False)
35
37
  prefer_binary = BoolField("prefer_binary", default=False)
36
38
  pre_release = BoolField("pre_release", default=False)
37
39
  pack_instance_id = StringField("pack_instance_id", default=None)
40
+ no_audit_wheel = BoolField("no_audit_wheel", default=False)
38
41
 
39
42
  def __init__(self, key: str = None, **kw):
40
43
  super().__init__(key=key, **kw)
41
44
  if self.key is None:
42
- args = {
43
- "force_rebuild": self.force_rebuild,
44
- "prefer_binary": self.prefer_binary,
45
- "pre_release": self.pre_release,
46
- }
45
+ args = {k: getattr(self, k) for k in self._key_args}
47
46
  self.key = tokenize(set(self.requirements), args)
48
47
 
49
48
  def __repr__(self):
50
- return (
51
- f"<PythonPackOptions {self.requirements} force_rebuild={self.force_rebuild} "
52
- f"prefer_binary={self.prefer_binary} pre_release={self.pre_release}>"
53
- )
49
+ args_str = " ".join(f"{k}={getattr(self, k)}" for k in self._key_args)
50
+ return f"<PythonPackOptions {self.requirements} {args_str}>"
54
51
 
55
52
 
56
53
  class MarkedFunction(Serializable):
@@ -101,6 +98,7 @@ def with_python_requirements(
101
98
  force_rebuild: bool = False,
102
99
  prefer_binary: bool = False,
103
100
  pre_release: bool = False,
101
+ no_audit_wheel: bool = False,
104
102
  ):
105
103
  result_req = []
106
104
  for req in requirements:
@@ -112,6 +110,7 @@ def with_python_requirements(
112
110
  force_rebuild=force_rebuild,
113
111
  prefer_binary=prefer_binary,
114
112
  pre_release=pre_release,
113
+ no_audit_wheel=no_audit_wheel,
115
114
  )
116
115
  if isinstance(func, MarkedFunction):
117
116
  func.pythonpacks.append(pack_item)
maxframe/utils.py CHANGED
@@ -19,7 +19,6 @@ import dataclasses
19
19
  import datetime
20
20
  import enum
21
21
  import functools
22
- import hashlib
23
22
  import importlib
24
23
  import inspect
25
24
  import io
@@ -75,7 +74,7 @@ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
75
74
  tokenize_int,
76
75
  )
77
76
  from .lib.version import parse as parse_version
78
- from .typing_ import ChunkType, EntityType, TileableType, TimeoutType
77
+ from .typing_ import TileableType, TimeoutType
79
78
 
80
79
  # make flake8 happy by referencing these imports
81
80
  NamedType = NamedType
@@ -245,58 +244,6 @@ def copy_tileables(tileables: List[TileableType], **kwargs):
245
244
  return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
246
245
 
247
246
 
248
- def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
249
- from .core.operator import ShuffleProxy
250
-
251
- chunk_op = chunk.op
252
- params = chunk.params.copy()
253
- assert not isinstance(chunk_op, ShuffleProxy)
254
- # for non-shuffle nodes, we build Fetch chunks
255
- # to replace original chunk
256
- op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
257
- return op.new_chunk(
258
- None,
259
- is_broadcaster=chunk.is_broadcaster,
260
- kws=[params],
261
- _key=chunk.key,
262
- **kwargs,
263
- )
264
-
265
-
266
- def build_fetch_tileable(tileable: TileableType) -> TileableType:
267
- if tileable.is_coarse():
268
- chunks = None
269
- else:
270
- chunks = []
271
- for c in tileable.chunks:
272
- fetch_chunk = build_fetch_chunk(c, index=c.index)
273
- chunks.append(fetch_chunk)
274
-
275
- tileable_op = tileable.op
276
- params = tileable.params.copy()
277
-
278
- new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
279
- return new_op.new_tileables(
280
- None,
281
- chunks=chunks,
282
- nsplits=tileable.nsplits,
283
- _key=tileable.key,
284
- _id=tileable.id,
285
- **params,
286
- )[0]
287
-
288
-
289
- def build_fetch(entity: EntityType) -> EntityType:
290
- from .core import CHUNK_TYPE, ENTITY_TYPE
291
-
292
- if isinstance(entity, CHUNK_TYPE):
293
- return build_fetch_chunk(entity)
294
- elif isinstance(entity, ENTITY_TYPE):
295
- return build_fetch_tileable(entity)
296
- else:
297
- raise TypeError(f"Type {type(entity)} not supported")
298
-
299
-
300
247
  def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
301
248
  if pd.api.types.is_extension_array_dtype(dtype):
302
249
  return dtype
@@ -386,13 +333,7 @@ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> st
386
333
 
387
334
 
388
335
  def build_session_volume_name(session_id: str) -> str:
389
- return f"mf_vol_{session_id}"
390
-
391
-
392
- def build_tileable_dir_name(tileable_key: str) -> str:
393
- m = hashlib.md5()
394
- m.update(f"mf_dir_{tileable_key}".encode())
395
- return m.hexdigest()
336
+ return f"mf_vol_{session_id.replace('-', '_')}"
396
337
 
397
338
 
398
339
  async def wait_http_response(
@@ -436,19 +377,27 @@ async def to_thread_pool(func, *args, pool=None, **kwargs):
436
377
  return await loop.run_in_executor(pool, func_call)
437
378
 
438
379
 
439
- def create_event(loop: asyncio.AbstractEventLoop) -> asyncio.Event:
380
+ _PrimitiveType = TypeVar("_PrimitiveType")
381
+
382
+
383
+ def create_sync_primitive(
384
+ cls: Type[_PrimitiveType], loop: asyncio.AbstractEventLoop
385
+ ) -> _PrimitiveType:
440
386
  """
441
- Create an asyncio.Event in a certain event loop.
387
+ Create an asyncio sync primitive (locks, events, etc.)
388
+ in a certain event loop.
442
389
  """
443
- if sys.version_info[1] < 10 or loop is None:
444
- return asyncio.Event(loop=loop)
390
+ if sys.version_info[1] < 10:
391
+ return cls(loop=loop)
445
392
 
446
393
  # From Python3.10 the loop parameter has been removed. We should work around here.
447
- old_loop = asyncio.get_running_loop()
448
- asyncio.set_event_loop(loop)
449
- event = asyncio.Event()
450
- asyncio.set_event_loop(old_loop)
451
- return event
394
+ old_loop = asyncio.get_event_loop()
395
+ try:
396
+ asyncio.set_event_loop(loop)
397
+ primitive = cls()
398
+ finally:
399
+ asyncio.set_event_loop(old_loop)
400
+ return primitive
452
401
 
453
402
 
454
403
  class ToThreadCancelledError(asyncio.CancelledError):
@@ -1115,3 +1064,16 @@ def get_item_if_scalar(val: Any) -> Any:
1115
1064
  if isinstance(val, np.ndarray) and val.shape == ():
1116
1065
  return val.item()
1117
1066
  return val
1067
+
1068
+
1069
+ def collect_leaf_operators(root) -> List[Type]:
1070
+ result = []
1071
+
1072
+ def _collect(op_type):
1073
+ if len(op_type.__subclasses__()) == 0:
1074
+ result.append(op_type)
1075
+ for subclass in op_type.__subclasses__():
1076
+ _collect(subclass)
1077
+
1078
+ _collect(root)
1079
+ return result
@@ -1,33 +1,33 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 1.0.0rc1
3
+ Version: 1.0.0rc3
4
4
  Summary: MaxFrame operator-based data analyze framework
5
- Requires-Dist: numpy <2.0.0,>=1.19.0
6
- Requires-Dist: pandas >=1.0.0
7
- Requires-Dist: pyodps >=0.11.6.1
8
- Requires-Dist: scipy >=1.0
9
- Requires-Dist: pyarrow >=1.0.0
10
- Requires-Dist: msgpack >=1.0.0
11
- Requires-Dist: traitlets >=5.0
12
- Requires-Dist: cloudpickle >=1.5.0
13
- Requires-Dist: pyyaml >=5.1
14
- Requires-Dist: tornado >=6.0
15
- Requires-Dist: defusedxml >=0.5.0
16
- Requires-Dist: tqdm >=4.1.0
17
- Requires-Dist: importlib-metadata >=1.4
18
- Requires-Dist: pickle5 ; python_version < "3.8"
5
+ Requires-Dist: numpy<2.0.0,>=1.19.0
6
+ Requires-Dist: pandas>=1.0.0
7
+ Requires-Dist: pyodps>=0.11.6.1
8
+ Requires-Dist: scipy>=1.0
9
+ Requires-Dist: pyarrow>=1.0.0
10
+ Requires-Dist: msgpack>=1.0.0
11
+ Requires-Dist: traitlets>=5.0
12
+ Requires-Dist: cloudpickle<3.0.0,>=1.5.0
13
+ Requires-Dist: pyyaml>=5.1
14
+ Requires-Dist: tornado>=6.0
15
+ Requires-Dist: defusedxml>=0.5.0
16
+ Requires-Dist: tqdm>=4.1.0
17
+ Requires-Dist: importlib-metadata>=1.4
18
+ Requires-Dist: pickle5; python_version < "3.8"
19
19
  Provides-Extra: dev
20
- Requires-Dist: black >=22.3.0 ; extra == 'dev'
21
- Requires-Dist: flake8 >=5.0.4 ; extra == 'dev'
22
- Requires-Dist: pre-commit >=2.15.0 ; extra == 'dev'
23
- Requires-Dist: graphviz >=0.20.1 ; extra == 'dev'
20
+ Requires-Dist: black>=22.3.0; extra == "dev"
21
+ Requires-Dist: flake8>=5.0.4; extra == "dev"
22
+ Requires-Dist: pre-commit>=2.15.0; extra == "dev"
23
+ Requires-Dist: graphviz>=0.20.1; extra == "dev"
24
24
  Provides-Extra: test
25
- Requires-Dist: mock ; extra == 'test'
26
- Requires-Dist: pytest >=7.3.1 ; extra == 'test'
27
- Requires-Dist: pytest-cov >=4.1.0 ; extra == 'test'
28
- Requires-Dist: pytest-asyncio >=0.21.0 ; extra == 'test'
29
- Requires-Dist: pytest-timeout >=2.1.0 ; extra == 'test'
30
- Requires-Dist: matplotlib >=2.0.0 ; extra == 'test'
25
+ Requires-Dist: mock; extra == "test"
26
+ Requires-Dist: pytest>=7.3.1; extra == "test"
27
+ Requires-Dist: pytest-cov>=4.1.0; extra == "test"
28
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
29
+ Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
30
+ Requires-Dist: matplotlib>=2.0.0; extra == "test"
31
31
 
32
32
  MaxCompute MaxFrame Client
33
33
  ==========================