maxframe 1.0.0rc3__cp37-cp37m-win32.whl → 1.1.0__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (112) hide show
  1. maxframe/_utils.cp37-win32.pyd +0 -0
  2. maxframe/codegen.py +1 -0
  3. maxframe/config/config.py +16 -1
  4. maxframe/conftest.py +52 -14
  5. maxframe/core/entity/executable.py +1 -1
  6. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  7. maxframe/core/operator/base.py +2 -0
  8. maxframe/dataframe/arithmetic/docstring.py +26 -2
  9. maxframe/dataframe/arithmetic/equal.py +4 -2
  10. maxframe/dataframe/arithmetic/greater.py +4 -2
  11. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  12. maxframe/dataframe/arithmetic/less.py +2 -2
  13. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  14. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  15. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  16. maxframe/dataframe/core.py +26 -2
  17. maxframe/dataframe/datasource/read_odps_query.py +116 -28
  18. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  19. maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
  20. maxframe/dataframe/datastore/to_odps.py +7 -0
  21. maxframe/dataframe/extensions/__init__.py +8 -0
  22. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  23. maxframe/dataframe/extensions/flatjson.py +131 -0
  24. maxframe/dataframe/extensions/flatmap.py +314 -0
  25. maxframe/dataframe/extensions/reshuffle.py +1 -1
  26. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  27. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  28. maxframe/dataframe/groupby/__init__.py +1 -0
  29. maxframe/dataframe/groupby/aggregation.py +1 -0
  30. maxframe/dataframe/groupby/apply.py +9 -1
  31. maxframe/dataframe/groupby/core.py +1 -1
  32. maxframe/dataframe/groupby/fill.py +4 -1
  33. maxframe/dataframe/groupby/getitem.py +6 -0
  34. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  35. maxframe/dataframe/groupby/transform.py +8 -2
  36. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  37. maxframe/dataframe/indexing/loc.py +6 -4
  38. maxframe/dataframe/indexing/rename.py +11 -0
  39. maxframe/dataframe/initializer.py +11 -1
  40. maxframe/dataframe/merge/__init__.py +9 -1
  41. maxframe/dataframe/merge/concat.py +41 -31
  42. maxframe/dataframe/merge/merge.py +1 -1
  43. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  44. maxframe/dataframe/misc/apply.py +3 -0
  45. maxframe/dataframe/misc/drop_duplicates.py +23 -2
  46. maxframe/dataframe/misc/map.py +3 -1
  47. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  48. maxframe/dataframe/misc/transform.py +22 -13
  49. maxframe/dataframe/reduction/__init__.py +3 -0
  50. maxframe/dataframe/reduction/aggregation.py +1 -0
  51. maxframe/dataframe/reduction/median.py +56 -0
  52. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  53. maxframe/dataframe/statistics/quantile.py +8 -2
  54. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  55. maxframe/dataframe/tests/test_initializer.py +33 -2
  56. maxframe/dataframe/tests/test_utils.py +60 -0
  57. maxframe/dataframe/utils.py +110 -7
  58. maxframe/dataframe/window/expanding.py +5 -3
  59. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  60. maxframe/io/objects/tests/test_object_io.py +39 -12
  61. maxframe/io/odpsio/arrow.py +30 -2
  62. maxframe/io/odpsio/schema.py +28 -8
  63. maxframe/io/odpsio/tableio.py +55 -133
  64. maxframe/io/odpsio/tests/test_schema.py +40 -4
  65. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  66. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  67. maxframe/io/odpsio/volumeio.py +36 -6
  68. maxframe/learn/contrib/__init__.py +3 -1
  69. maxframe/learn/contrib/graph/__init__.py +15 -0
  70. maxframe/learn/contrib/graph/connected_components.py +215 -0
  71. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  72. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  73. maxframe/learn/contrib/llm/__init__.py +16 -0
  74. maxframe/learn/contrib/llm/core.py +54 -0
  75. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  76. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  77. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  78. maxframe/learn/contrib/llm/text.py +42 -0
  79. maxframe/learn/contrib/xgboost/classifier.py +3 -3
  80. maxframe/learn/contrib/xgboost/predict.py +8 -39
  81. maxframe/learn/contrib/xgboost/train.py +4 -3
  82. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  83. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  84. maxframe/opcodes.py +10 -1
  85. maxframe/protocol.py +6 -1
  86. maxframe/serialization/core.cp37-win32.pyd +0 -0
  87. maxframe/serialization/core.pyx +13 -1
  88. maxframe/serialization/pandas.py +50 -20
  89. maxframe/serialization/serializables/core.py +24 -5
  90. maxframe/serialization/serializables/field_type.py +4 -1
  91. maxframe/serialization/serializables/tests/test_serializable.py +8 -1
  92. maxframe/serialization/tests/test_serial.py +2 -1
  93. maxframe/session.py +9 -2
  94. maxframe/tensor/__init__.py +19 -7
  95. maxframe/tensor/indexing/getitem.py +2 -0
  96. maxframe/tensor/merge/concatenate.py +23 -20
  97. maxframe/tensor/merge/vstack.py +5 -1
  98. maxframe/tensor/misc/transpose.py +1 -1
  99. maxframe/tests/utils.py +16 -0
  100. maxframe/udf.py +27 -0
  101. maxframe/utils.py +64 -14
  102. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
  103. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +112 -96
  104. maxframe_client/clients/framedriver.py +4 -1
  105. maxframe_client/fetcher.py +28 -10
  106. maxframe_client/session/consts.py +3 -0
  107. maxframe_client/session/odps.py +104 -20
  108. maxframe_client/session/task.py +42 -26
  109. maxframe_client/session/tests/test_task.py +0 -4
  110. maxframe_client/tests/test_session.py +44 -12
  111. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +0 -0
  112. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
maxframe/session.py CHANGED
@@ -150,6 +150,10 @@ class AbstractSession(ABC):
150
150
  def session_id(self):
151
151
  return self._session_id
152
152
 
153
+ @property
154
+ def closed(self) -> bool:
155
+ return self._closed
156
+
153
157
  def __eq__(self, other):
154
158
  return (
155
159
  isinstance(other, AbstractSession)
@@ -1283,9 +1287,12 @@ def get_default_or_create(**kwargs):
1283
1287
  if session is None:
1284
1288
  # no session attached, try to create one
1285
1289
  warnings.warn(warning_msg)
1286
- session = new_session(
1287
- ODPS.from_global() or ODPS.from_environments(), **kwargs
1290
+ odps_entry = (
1291
+ kwargs.pop("odps_entry", None)
1292
+ or ODPS.from_global()
1293
+ or ODPS.from_environments()
1288
1294
  )
1295
+ session = new_session(odps_entry=odps_entry, **kwargs)
1289
1296
  session.as_default()
1290
1297
  if isinstance(session, IsolatedAsyncSession):
1291
1298
  session = SyncSession.from_isolated_session(session)
@@ -191,11 +191,6 @@ from .ufunc import ufunc
191
191
  # isort: off
192
192
  # noinspection PyUnresolvedReferences
193
193
  from numpy import (
194
- NAN,
195
- NINF,
196
- AxisError,
197
- Inf,
198
- NaN,
199
194
  e,
200
195
  errstate,
201
196
  geterr,
@@ -206,12 +201,21 @@ from numpy import (
206
201
  seterr,
207
202
  )
208
203
 
204
+ try:
205
+ from numpy.exceptions import AxisError
206
+ except ImportError:
207
+ from numpy import AxisError
208
+
209
+ NAN = nan
210
+ NINF = -inf
211
+ Inf = inf
212
+ NaN = nan
213
+
209
214
  # import numpy types
210
215
  # noinspection PyUnresolvedReferences
211
216
  from numpy import (
212
217
  bool_ as bool,
213
218
  bytes_,
214
- cfloat,
215
219
  character,
216
220
  complex64,
217
221
  complex128,
@@ -242,9 +246,17 @@ from numpy import (
242
246
  uint16,
243
247
  uint32,
244
248
  uint64,
245
- unicode_,
246
249
  unsignedinteger,
247
250
  void,
248
251
  )
249
252
 
253
+ try:
254
+ from numpy import cfloat
255
+ except ImportError:
256
+ from numpy import cdouble as cfloat
257
+ try:
258
+ from numpy import str_ as unicode_
259
+ except ImportError:
260
+ from numpy import unicode_
261
+
250
262
  del fetch, ufunc
@@ -130,6 +130,8 @@ def _calc_order(a, index):
130
130
  continue
131
131
  elif isinstance(ind, slice):
132
132
  shape = a.shape[in_axis]
133
+ if shape is np.nan:
134
+ return TensorOrder.C_ORDER
133
135
  slc = ind.indices(shape)
134
136
  if slc[0] == 0 and slc[1] == shape and slc[2] == 1:
135
137
  continue
@@ -26,27 +26,9 @@ class TensorConcatenate(TensorOperator, TensorOperatorMixin):
26
26
  axis = Int32Field("axis", default=0)
27
27
 
28
28
  def __call__(self, tensors):
29
- if len(set(t.ndim for t in tensors)) != 1:
30
- raise ValueError(
31
- "all the input tensors must have same number of dimensions"
32
- )
33
-
34
29
  axis = self.axis
35
- shapes = [t.shape[:axis] + t.shape[axis + 1 :] for t in tensors]
36
- if len(set(shapes)) != 1:
37
- raise ValueError(
38
- "all the input tensor dimensions "
39
- "except for the concatenation axis must match exactly"
40
- )
41
-
42
- shape = [
43
- 0 if i == axis else tensors[0].shape[i] for i in range(tensors[0].ndim)
44
- ]
30
+ shape = _calc_concatenate_shape(tensors, axis)
45
31
  shape[axis] = sum(t.shape[axis] for t in tensors)
46
-
47
- if any(np.isnan(s) for i, s in enumerate(shape) if i != axis):
48
- raise ValueError("cannot concatenate tensor with unknown shape")
49
-
50
32
  return self.new_tensor(tensors, shape=tuple(shape))
51
33
 
52
34
 
@@ -90,9 +72,30 @@ def concatenate(tensors, axis=0):
90
72
  if axis is None:
91
73
  axis = 0
92
74
  tensors = [astensor(t) for t in tensors]
93
-
94
75
  axis = validate_axis(tensors[0].ndim, axis)
76
+
77
+ if len(set(t.ndim for t in tensors)) != 1:
78
+ raise ValueError("all the input tensors must have same number of dimensions")
79
+
80
+ shapes = [t.shape[:axis] + t.shape[axis + 1 :] for t in tensors]
81
+ if len(set(shapes)) != 1:
82
+ raise ValueError(
83
+ "all the input tensor dimensions "
84
+ "except for the concatenation axis must match exactly"
85
+ )
86
+ shape = _calc_concatenate_shape(tensors, axis)
87
+ if any(np.isnan(s) for i, s in enumerate(shape) if i != axis):
88
+ raise ValueError("cannot concatenate tensor with unknown shape")
89
+
90
+ return _concatenate(tensors, axis)
91
+
92
+
93
+ def _concatenate(tensors, axis=0):
95
94
  dtype = np.result_type(*(t.dtype for t in tensors))
96
95
 
97
96
  op = TensorConcatenate(axis=axis, dtype=dtype)
98
97
  return op(tensors)
98
+
99
+
100
+ def _calc_concatenate_shape(tensors, axis):
101
+ return [0 if i == axis else tensors[0].shape[i] for i in range(tensors[0].ndim)]
@@ -14,7 +14,7 @@
14
14
 
15
15
 
16
16
  from ..misc import atleast_2d
17
- from .concatenate import concatenate
17
+ from .concatenate import _concatenate, concatenate
18
18
 
19
19
 
20
20
  def vstack(tup):
@@ -68,3 +68,7 @@ def vstack(tup):
68
68
 
69
69
  """
70
70
  return concatenate([atleast_2d(t) for t in tup], axis=0)
71
+
72
+
73
+ def _vstack(tup):
74
+ return _concatenate([atleast_2d(t) for t in tup], axis=0)
@@ -125,5 +125,5 @@ def transpose(a, axes=None):
125
125
  axes = list(range(a.ndim))[::-1]
126
126
  else:
127
127
  axes = list(axes)
128
- op = TensorTranspose(axes)
128
+ op = TensorTranspose(axes, dtype=a.dtype)
129
129
  return op(a)
maxframe/tests/utils.py CHANGED
@@ -18,11 +18,13 @@ import hashlib
18
18
  import os
19
19
  import queue
20
20
  import socket
21
+ import time
21
22
  import types
22
23
  from threading import Thread
23
24
  from typing import Dict, List, Optional, Set, Tuple
24
25
 
25
26
  import pytest
27
+ from odps import ODPS
26
28
  from tornado import netutil
27
29
 
28
30
  from ..core import Tileable, TileableGraph
@@ -171,3 +173,17 @@ def get_test_unique_name(size=None):
171
173
  if size:
172
174
  digest = digest[:size]
173
175
  return digest + "_" + str(os.getpid())
176
+
177
+
178
+ def assert_mf_index_dtype(idx_obj, dtype):
179
+ from ..dataframe.core import IndexValue
180
+
181
+ assert isinstance(idx_obj, IndexValue.IndexBase) and idx_obj.dtype == dtype
182
+
183
+
184
+ def ensure_table_deleted(odps_entry: ODPS, table_name: str) -> None:
185
+ retry_times = 20
186
+ while odps_entry.exist_table(table_name) and retry_times > 0:
187
+ time.sleep(1)
188
+ retry_times -= 1
189
+ assert not odps_entry.exist_table(table_name)
maxframe/udf.py CHANGED
@@ -19,6 +19,7 @@ from odps.models import Resource
19
19
 
20
20
  from .serialization.serializables import (
21
21
  BoolField,
22
+ DictField,
22
23
  FieldTypes,
23
24
  FunctionField,
24
25
  ListField,
@@ -54,6 +55,10 @@ class MarkedFunction(Serializable):
54
55
  func = FunctionField("func")
55
56
  resources = ListField("resources", FieldTypes.string, default_factory=list)
56
57
  pythonpacks = ListField("pythonpacks", FieldTypes.reference, default_factory=list)
58
+ expect_engine = StringField("expect_engine", default=None)
59
+ expect_resources = DictField(
60
+ "expect_resources", FieldTypes.string, default_factory=dict
61
+ )
57
62
 
58
63
  def __init__(self, func: Optional[Callable] = None, **kw):
59
64
  super().__init__(func=func, **kw)
@@ -120,6 +125,28 @@ def with_python_requirements(
120
125
  return func_wrapper
121
126
 
122
127
 
128
+ def with_running_options(
129
+ *,
130
+ engine: Optional[str] = None,
131
+ cpu: Optional[int] = None,
132
+ memory: Optional[int] = None,
133
+ **kwargs,
134
+ ):
135
+ engine = engine.upper() if engine else None
136
+ resources = {"cpu": cpu, "memory": memory, **kwargs}
137
+
138
+ def func_wrapper(func):
139
+ if all(v is None for v in (engine, cpu, memory)):
140
+ return func
141
+ if isinstance(func, MarkedFunction):
142
+ func.expect_engine = engine
143
+ func.expect_resources = resources
144
+ return func
145
+ return MarkedFunction(func, expect_engine=engine, expect_resources=resources)
146
+
147
+ return func_wrapper
148
+
149
+
123
150
  with_resource_libraries = with_resources
124
151
 
125
152
 
maxframe/utils.py CHANGED
@@ -370,13 +370,6 @@ def format_timeout_params(timeout: TimeoutType) -> str:
370
370
  return f"?wait=1&timeout={timeout}"
371
371
 
372
372
 
373
- async def to_thread_pool(func, *args, pool=None, **kwargs):
374
- loop = asyncio.events.get_running_loop()
375
- ctx = contextvars.copy_context()
376
- func_call = functools.partial(ctx.run, func, *args, **kwargs)
377
- return await loop.run_in_executor(pool, func_call)
378
-
379
-
380
373
  _PrimitiveType = TypeVar("_PrimitiveType")
381
374
 
382
375
 
@@ -438,15 +431,22 @@ class ToThreadMixin:
438
431
  thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
439
432
  )
440
433
 
441
- task = asyncio.create_task(
442
- to_thread_pool(func, *args, **kwargs, pool=self._pool)
443
- )
434
+ loop = asyncio.events.get_running_loop()
435
+ ctx = contextvars.copy_context()
436
+ func_call = functools.partial(ctx.run, func, *args, **kwargs)
437
+ fut = loop.run_in_executor(self._pool, func_call)
438
+
444
439
  try:
445
- return await asyncio.wait_for(asyncio.shield(task), timeout)
440
+ coro = fut
441
+ if wait_on_cancel:
442
+ coro = asyncio.shield(coro)
443
+ if timeout is not None:
444
+ coro = asyncio.wait_for(coro, timeout)
445
+ return await coro
446
446
  except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
447
447
  if not wait_on_cancel:
448
448
  raise
449
- result = await task
449
+ result = await fut
450
450
  raise ToThreadCancelledError(*ex.args, result=result)
451
451
 
452
452
  def ensure_async_call(
@@ -835,8 +835,41 @@ def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
835
835
  raise ValueError(f"Unknown limitation value: {value}")
836
836
 
837
837
 
838
- def remove_suffix(value: str, suffix: str) -> str:
839
- return value[: -len(suffix)] if value.endswith(suffix) else value
838
+ def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
839
+ """
840
+ Remove a suffix from a given string if it exists.
841
+
842
+ Parameters
843
+ ----------
844
+ value : str
845
+ The original string.
846
+ suffix : str
847
+ The suffix to be removed.
848
+
849
+ Returns
850
+ -------
851
+ Tuple[str, bool]
852
+ A tuple containing the modified string and a boolean indicating whether the suffix was found.
853
+ """
854
+
855
+ # Check if the suffix is an empty string
856
+ if len(suffix) == 0:
857
+ # If the suffix is empty, return the original string with True
858
+ return value, True
859
+
860
+ # Check if the length of the value is less than the length of the suffix
861
+ if len(value) < len(suffix):
862
+ # If the value is shorter than the suffix, it cannot have the suffix
863
+ return value, False
864
+
865
+ # Check if the suffix matches the end of the value
866
+ match = value.endswith(suffix)
867
+
868
+ # If the suffix is found, remove it; otherwise, return the original string
869
+ if match:
870
+ return value[: -len(suffix)], match
871
+ else:
872
+ return value, match
840
873
 
841
874
 
842
875
  def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
@@ -1077,3 +1110,20 @@ def collect_leaf_operators(root) -> List[Type]:
1077
1110
 
1078
1111
  _collect(root)
1079
1112
  return result
1113
+
1114
+
1115
+ @contextmanager
1116
+ def sync_pyodps_options():
1117
+ from odps.config import option_context as pyodps_option_context
1118
+
1119
+ from .config import options
1120
+
1121
+ with pyodps_option_context() as cfg:
1122
+ cfg.local_timezone = options.local_timezone
1123
+ if options.session.enable_schema:
1124
+ cfg.enable_schema = options.session.enable_schema
1125
+ yield
1126
+
1127
+
1128
+ def str_to_bool(s: Optional[str]) -> Optional[bool]:
1129
+ return s.lower().strip() in ("true", "1") if s is not None else None
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 1.0.0rc3
3
+ Version: 1.1.0
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy <2.0.0,>=1.19.0
6
6
  Requires-Dist: pandas >=1.0.0
7
- Requires-Dist: pyodps >=0.11.6.1
7
+ Requires-Dist: pyodps >=0.12.0
8
8
  Requires-Dist: scipy >=1.0
9
9
  Requires-Dist: pyarrow >=1.0.0
10
10
  Requires-Dist: msgpack >=1.0.0