maxframe 1.0.0rc4__cp310-cp310-win32.whl → 1.1.1__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (88) hide show
  1. maxframe/_utils.cp310-win32.pyd +0 -0
  2. maxframe/config/__init__.py +1 -1
  3. maxframe/config/config.py +26 -0
  4. maxframe/config/tests/test_config.py +20 -1
  5. maxframe/conftest.py +17 -4
  6. maxframe/core/graph/core.cp310-win32.pyd +0 -0
  7. maxframe/core/operator/base.py +2 -0
  8. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  9. maxframe/dataframe/core.py +24 -2
  10. maxframe/dataframe/datasource/read_odps_query.py +65 -35
  11. maxframe/dataframe/datasource/read_odps_table.py +4 -2
  12. maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
  13. maxframe/dataframe/extensions/__init__.py +5 -0
  14. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  15. maxframe/dataframe/extensions/flatjson.py +131 -0
  16. maxframe/dataframe/extensions/flatmap.py +28 -40
  17. maxframe/dataframe/extensions/reshuffle.py +1 -1
  18. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  19. maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
  20. maxframe/dataframe/groupby/__init__.py +1 -0
  21. maxframe/dataframe/groupby/aggregation.py +1 -0
  22. maxframe/dataframe/groupby/apply.py +9 -1
  23. maxframe/dataframe/groupby/core.py +1 -1
  24. maxframe/dataframe/groupby/fill.py +4 -1
  25. maxframe/dataframe/groupby/getitem.py +6 -0
  26. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  27. maxframe/dataframe/groupby/transform.py +8 -2
  28. maxframe/dataframe/indexing/loc.py +6 -4
  29. maxframe/dataframe/merge/__init__.py +9 -1
  30. maxframe/dataframe/merge/concat.py +41 -31
  31. maxframe/dataframe/merge/merge.py +1 -1
  32. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  33. maxframe/dataframe/misc/apply.py +3 -0
  34. maxframe/dataframe/misc/drop_duplicates.py +5 -1
  35. maxframe/dataframe/misc/map.py +3 -1
  36. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  37. maxframe/dataframe/misc/transform.py +22 -13
  38. maxframe/dataframe/reduction/__init__.py +3 -0
  39. maxframe/dataframe/reduction/aggregation.py +1 -0
  40. maxframe/dataframe/reduction/median.py +56 -0
  41. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  42. maxframe/dataframe/statistics/quantile.py +8 -2
  43. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  44. maxframe/dataframe/tests/test_utils.py +60 -0
  45. maxframe/dataframe/utils.py +110 -7
  46. maxframe/dataframe/window/expanding.py +5 -3
  47. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  48. maxframe/io/objects/tests/test_object_io.py +39 -12
  49. maxframe/io/odpsio/__init__.py +1 -1
  50. maxframe/io/odpsio/arrow.py +51 -2
  51. maxframe/io/odpsio/schema.py +23 -5
  52. maxframe/io/odpsio/tableio.py +80 -124
  53. maxframe/io/odpsio/tests/test_schema.py +40 -0
  54. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  55. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  56. maxframe/io/odpsio/volumeio.py +27 -3
  57. maxframe/learn/contrib/__init__.py +3 -2
  58. maxframe/learn/contrib/llm/__init__.py +16 -0
  59. maxframe/learn/contrib/llm/core.py +54 -0
  60. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  61. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  62. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  63. maxframe/learn/contrib/llm/text.py +42 -0
  64. maxframe/lib/mmh3.cp310-win32.pyd +0 -0
  65. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  66. maxframe/opcodes.py +7 -1
  67. maxframe/serialization/core.cp310-win32.pyd +0 -0
  68. maxframe/serialization/core.pyx +13 -1
  69. maxframe/serialization/pandas.py +50 -20
  70. maxframe/serialization/serializables/core.py +70 -15
  71. maxframe/serialization/serializables/field_type.py +4 -1
  72. maxframe/serialization/serializables/tests/test_serializable.py +12 -2
  73. maxframe/serialization/tests/test_serial.py +2 -1
  74. maxframe/tensor/__init__.py +19 -7
  75. maxframe/tensor/merge/vstack.py +1 -1
  76. maxframe/tests/utils.py +16 -0
  77. maxframe/udf.py +27 -0
  78. maxframe/utils.py +42 -8
  79. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/METADATA +4 -4
  80. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/RECORD +88 -77
  81. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/WHEEL +1 -1
  82. maxframe_client/clients/framedriver.py +4 -1
  83. maxframe_client/fetcher.py +23 -8
  84. maxframe_client/session/odps.py +40 -11
  85. maxframe_client/session/task.py +6 -25
  86. maxframe_client/session/tests/test_task.py +35 -6
  87. maxframe_client/tests/test_session.py +30 -10
  88. {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/top_level.txt +0 -0
@@ -42,7 +42,7 @@ except ImportError:
42
42
  from ...lib.sparse import SparseMatrix
43
43
  from ...lib.wrapped_pickle import switch_unpickle
44
44
  from ...tests.utils import require_cudf, require_cupy
45
- from ...utils import lazy_import
45
+ from ...utils import lazy_import, no_default
46
46
  from .. import (
47
47
  PickleContainer,
48
48
  RemoteException,
@@ -90,6 +90,7 @@ class CustomNamedTuple(NamedTuple):
90
90
  pd.Timedelta(102.234154131),
91
91
  {"abc": 5.6, "def": [3.4], "gh": None, "ijk": {}},
92
92
  OrderedDict([("abcd", 5.6)]),
93
+ no_default,
93
94
  ],
94
95
  )
95
96
  @switch_unpickle
@@ -191,11 +191,6 @@ from .ufunc import ufunc
191
191
  # isort: off
192
192
  # noinspection PyUnresolvedReferences
193
193
  from numpy import (
194
- NAN,
195
- NINF,
196
- AxisError,
197
- Inf,
198
- NaN,
199
194
  e,
200
195
  errstate,
201
196
  geterr,
@@ -206,12 +201,21 @@ from numpy import (
206
201
  seterr,
207
202
  )
208
203
 
204
+ try:
205
+ from numpy.exceptions import AxisError
206
+ except ImportError:
207
+ from numpy import AxisError
208
+
209
+ NAN = nan
210
+ NINF = -inf
211
+ Inf = inf
212
+ NaN = nan
213
+
209
214
  # import numpy types
210
215
  # noinspection PyUnresolvedReferences
211
216
  from numpy import (
212
217
  bool_ as bool,
213
218
  bytes_,
214
- cfloat,
215
219
  character,
216
220
  complex64,
217
221
  complex128,
@@ -242,9 +246,17 @@ from numpy import (
242
246
  uint16,
243
247
  uint32,
244
248
  uint64,
245
- unicode_,
246
249
  unsignedinteger,
247
250
  void,
248
251
  )
249
252
 
253
+ try:
254
+ from numpy import cfloat
255
+ except ImportError:
256
+ from numpy import cdouble as cfloat
257
+ try:
258
+ from numpy import str_ as unicode_
259
+ except ImportError:
260
+ from numpy import unicode_
261
+
250
262
  del fetch, ufunc
@@ -48,7 +48,7 @@ def vstack(tup):
48
48
 
49
49
  Examples
50
50
  --------
51
- >>> import mars.tensor as mt
51
+ >>> import maxframe.tensor as mt
52
52
 
53
53
  >>> a = mt.array([1, 2, 3])
54
54
  >>> b = mt.array([2, 3, 4])
maxframe/tests/utils.py CHANGED
@@ -18,11 +18,13 @@ import hashlib
18
18
  import os
19
19
  import queue
20
20
  import socket
21
+ import time
21
22
  import types
22
23
  from threading import Thread
23
24
  from typing import Dict, List, Optional, Set, Tuple
24
25
 
25
26
  import pytest
27
+ from odps import ODPS
26
28
  from tornado import netutil
27
29
 
28
30
  from ..core import Tileable, TileableGraph
@@ -171,3 +173,17 @@ def get_test_unique_name(size=None):
171
173
  if size:
172
174
  digest = digest[:size]
173
175
  return digest + "_" + str(os.getpid())
176
+
177
+
178
+ def assert_mf_index_dtype(idx_obj, dtype):
179
+ from ..dataframe.core import IndexValue
180
+
181
+ assert isinstance(idx_obj, IndexValue.IndexBase) and idx_obj.dtype == dtype
182
+
183
+
184
+ def ensure_table_deleted(odps_entry: ODPS, table_name: str) -> None:
185
+ retry_times = 20
186
+ while odps_entry.exist_table(table_name) and retry_times > 0:
187
+ time.sleep(1)
188
+ retry_times -= 1
189
+ assert not odps_entry.exist_table(table_name)
maxframe/udf.py CHANGED
@@ -19,6 +19,7 @@ from odps.models import Resource
19
19
 
20
20
  from .serialization.serializables import (
21
21
  BoolField,
22
+ DictField,
22
23
  FieldTypes,
23
24
  FunctionField,
24
25
  ListField,
@@ -54,6 +55,10 @@ class MarkedFunction(Serializable):
54
55
  func = FunctionField("func")
55
56
  resources = ListField("resources", FieldTypes.string, default_factory=list)
56
57
  pythonpacks = ListField("pythonpacks", FieldTypes.reference, default_factory=list)
58
+ expect_engine = StringField("expect_engine", default=None)
59
+ expect_resources = DictField(
60
+ "expect_resources", FieldTypes.string, default_factory=dict
61
+ )
57
62
 
58
63
  def __init__(self, func: Optional[Callable] = None, **kw):
59
64
  super().__init__(func=func, **kw)
@@ -120,6 +125,28 @@ def with_python_requirements(
120
125
  return func_wrapper
121
126
 
122
127
 
128
+ def with_running_options(
129
+ *,
130
+ engine: Optional[str] = None,
131
+ cpu: Optional[int] = None,
132
+ memory: Optional[int] = None,
133
+ **kwargs,
134
+ ):
135
+ engine = engine.upper() if engine else None
136
+ resources = {"cpu": cpu, "memory": memory, **kwargs}
137
+
138
+ def func_wrapper(func):
139
+ if all(v is None for v in (engine, cpu, memory)):
140
+ return func
141
+ if isinstance(func, MarkedFunction):
142
+ func.expect_engine = engine
143
+ func.expect_resources = resources
144
+ return func
145
+ return MarkedFunction(func, expect_engine=engine, expect_resources=resources)
146
+
147
+ return func_wrapper
148
+
149
+
123
150
  with_resource_libraries = with_resources
124
151
 
125
152
 
maxframe/utils.py CHANGED
@@ -835,8 +835,41 @@ def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
835
835
  raise ValueError(f"Unknown limitation value: {value}")
836
836
 
837
837
 
838
- def remove_suffix(value: str, suffix: str) -> str:
839
- return value[: -len(suffix)] if value.endswith(suffix) else value
838
+ def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
839
+ """
840
+ Remove a suffix from a given string if it exists.
841
+
842
+ Parameters
843
+ ----------
844
+ value : str
845
+ The original string.
846
+ suffix : str
847
+ The suffix to be removed.
848
+
849
+ Returns
850
+ -------
851
+ Tuple[str, bool]
852
+ A tuple containing the modified string and a boolean indicating whether the suffix was found.
853
+ """
854
+
855
+ # Check if the suffix is an empty string
856
+ if len(suffix) == 0:
857
+ # If the suffix is empty, return the original string with True
858
+ return value, True
859
+
860
+ # Check if the length of the value is less than the length of the suffix
861
+ if len(value) < len(suffix):
862
+ # If the value is shorter than the suffix, it cannot have the suffix
863
+ return value, False
864
+
865
+ # Check if the suffix matches the end of the value
866
+ match = value.endswith(suffix)
867
+
868
+ # If the suffix is found, remove it; otherwise, return the original string
869
+ if match:
870
+ return value[: -len(suffix)], match
871
+ else:
872
+ return value, match
840
873
 
841
874
 
842
875
  def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
@@ -1081,7 +1114,6 @@ def collect_leaf_operators(root) -> List[Type]:
1081
1114
 
1082
1115
  @contextmanager
1083
1116
  def sync_pyodps_options():
1084
- from odps.config import OptionError
1085
1117
  from odps.config import option_context as pyodps_option_context
1086
1118
 
1087
1119
  from .config import options
@@ -1089,13 +1121,15 @@ def sync_pyodps_options():
1089
1121
  with pyodps_option_context() as cfg:
1090
1122
  cfg.local_timezone = options.local_timezone
1091
1123
  if options.session.enable_schema:
1092
- try:
1093
- cfg.enable_schema = options.session.enable_schema
1094
- except (AttributeError, OptionError):
1095
- # fixme enable_schema only supported in PyODPS 0.12.0 or later
1096
- cfg.always_enable_schema = options.session.enable_schema
1124
+ cfg.enable_schema = options.session.enable_schema
1097
1125
  yield
1098
1126
 
1099
1127
 
1100
1128
  def str_to_bool(s: Optional[str]) -> Optional[bool]:
1101
1129
  return s.lower().strip() in ("true", "1") if s is not None else None
1130
+
1131
+
1132
+ def is_empty(val):
1133
+ if isinstance(val, (pd.DataFrame, pd.Series, pd.Index)):
1134
+ return val.empty
1135
+ return not bool(val)
@@ -1,21 +1,21 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: maxframe
3
- Version: 1.0.0rc4
3
+ Version: 1.1.1
4
4
  Summary: MaxFrame operator-based data analyze framework
5
5
  Requires-Dist: numpy<2.0.0,>=1.19.0
6
6
  Requires-Dist: pandas>=1.0.0
7
- Requires-Dist: pyodps>=0.11.6.1
7
+ Requires-Dist: pyodps>=0.12.0
8
8
  Requires-Dist: scipy>=1.0
9
9
  Requires-Dist: pyarrow>=1.0.0
10
10
  Requires-Dist: msgpack>=1.0.0
11
11
  Requires-Dist: traitlets>=5.0
12
12
  Requires-Dist: cloudpickle<3.0.0,>=1.5.0
13
13
  Requires-Dist: pyyaml>=5.1
14
+ Requires-Dist: pickle5; python_version < "3.8"
14
15
  Requires-Dist: tornado>=6.0
15
16
  Requires-Dist: defusedxml>=0.5.0
16
17
  Requires-Dist: tqdm>=4.1.0
17
- Requires-Dist: importlib-metadata>=1.4
18
- Requires-Dist: pickle5; python_version < "3.8"
18
+ Requires-Dist: importlib_metadata>=1.4
19
19
  Provides-Extra: dev
20
20
  Requires-Dist: black>=22.3.0; extra == "dev"
21
21
  Requires-Dist: flake8>=5.0.4; extra == "dev"