maxframe 1.0.0rc4__cp39-cp39-win32.whl → 1.1.1__cp39-cp39-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp39-win32.pyd +0 -0
- maxframe/config/__init__.py +1 -1
- maxframe/config/config.py +26 -0
- maxframe/config/tests/test_config.py +20 -1
- maxframe/conftest.py +17 -4
- maxframe/core/graph/core.cp39-win32.pyd +0 -0
- maxframe/core/operator/base.py +2 -0
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
- maxframe/dataframe/core.py +24 -2
- maxframe/dataframe/datasource/read_odps_query.py +65 -35
- maxframe/dataframe/datasource/read_odps_table.py +4 -2
- maxframe/dataframe/datasource/tests/test_datasource.py +59 -7
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/apply_chunk.py +649 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +28 -40
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +46 -2
- maxframe/dataframe/groupby/__init__.py +1 -0
- maxframe/dataframe/groupby/aggregation.py +1 -0
- maxframe/dataframe/groupby/apply.py +9 -1
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
- maxframe/dataframe/groupby/transform.py +8 -2
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +1 -1
- maxframe/dataframe/merge/tests/test_merge.py +3 -1
- maxframe/dataframe/misc/apply.py +3 -0
- maxframe/dataframe/misc/drop_duplicates.py +5 -1
- maxframe/dataframe/misc/map.py +3 -1
- maxframe/dataframe/misc/tests/test_misc.py +24 -2
- maxframe/dataframe/misc/transform.py +22 -13
- maxframe/dataframe/reduction/__init__.py +3 -0
- maxframe/dataframe/reduction/aggregation.py +1 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
- maxframe/dataframe/statistics/quantile.py +8 -2
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_utils.py +60 -0
- maxframe/dataframe/utils.py +110 -7
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +39 -12
- maxframe/io/odpsio/__init__.py +1 -1
- maxframe/io/odpsio/arrow.py +51 -2
- maxframe/io/odpsio/schema.py +23 -5
- maxframe/io/odpsio/tableio.py +80 -124
- maxframe/io/odpsio/tests/test_schema.py +40 -0
- maxframe/io/odpsio/tests/test_tableio.py +5 -5
- maxframe/io/odpsio/tests/test_volumeio.py +35 -11
- maxframe/io/odpsio/volumeio.py +27 -3
- maxframe/learn/contrib/__init__.py +3 -2
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/lib/mmh3.cp39-win32.pyd +0 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/opcodes.py +7 -1
- maxframe/serialization/core.cp39-win32.pyd +0 -0
- maxframe/serialization/core.pyx +13 -1
- maxframe/serialization/pandas.py +50 -20
- maxframe/serialization/serializables/core.py +70 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +12 -2
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/tensor/__init__.py +19 -7
- maxframe/tensor/merge/vstack.py +1 -1
- maxframe/tests/utils.py +16 -0
- maxframe/udf.py +27 -0
- maxframe/utils.py +42 -8
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/METADATA +4 -4
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/RECORD +88 -77
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +23 -8
- maxframe_client/session/odps.py +40 -11
- maxframe_client/session/task.py +6 -25
- maxframe_client/session/tests/test_task.py +35 -6
- maxframe_client/tests/test_session.py +30 -10
- {maxframe-1.0.0rc4.dist-info → maxframe-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -42,7 +42,7 @@ except ImportError:
|
|
|
42
42
|
from ...lib.sparse import SparseMatrix
|
|
43
43
|
from ...lib.wrapped_pickle import switch_unpickle
|
|
44
44
|
from ...tests.utils import require_cudf, require_cupy
|
|
45
|
-
from ...utils import lazy_import
|
|
45
|
+
from ...utils import lazy_import, no_default
|
|
46
46
|
from .. import (
|
|
47
47
|
PickleContainer,
|
|
48
48
|
RemoteException,
|
|
@@ -90,6 +90,7 @@ class CustomNamedTuple(NamedTuple):
|
|
|
90
90
|
pd.Timedelta(102.234154131),
|
|
91
91
|
{"abc": 5.6, "def": [3.4], "gh": None, "ijk": {}},
|
|
92
92
|
OrderedDict([("abcd", 5.6)]),
|
|
93
|
+
no_default,
|
|
93
94
|
],
|
|
94
95
|
)
|
|
95
96
|
@switch_unpickle
|
maxframe/tensor/__init__.py
CHANGED
|
@@ -191,11 +191,6 @@ from .ufunc import ufunc
|
|
|
191
191
|
# isort: off
|
|
192
192
|
# noinspection PyUnresolvedReferences
|
|
193
193
|
from numpy import (
|
|
194
|
-
NAN,
|
|
195
|
-
NINF,
|
|
196
|
-
AxisError,
|
|
197
|
-
Inf,
|
|
198
|
-
NaN,
|
|
199
194
|
e,
|
|
200
195
|
errstate,
|
|
201
196
|
geterr,
|
|
@@ -206,12 +201,21 @@ from numpy import (
|
|
|
206
201
|
seterr,
|
|
207
202
|
)
|
|
208
203
|
|
|
204
|
+
try:
|
|
205
|
+
from numpy.exceptions import AxisError
|
|
206
|
+
except ImportError:
|
|
207
|
+
from numpy import AxisError
|
|
208
|
+
|
|
209
|
+
NAN = nan
|
|
210
|
+
NINF = -inf
|
|
211
|
+
Inf = inf
|
|
212
|
+
NaN = nan
|
|
213
|
+
|
|
209
214
|
# import numpy types
|
|
210
215
|
# noinspection PyUnresolvedReferences
|
|
211
216
|
from numpy import (
|
|
212
217
|
bool_ as bool,
|
|
213
218
|
bytes_,
|
|
214
|
-
cfloat,
|
|
215
219
|
character,
|
|
216
220
|
complex64,
|
|
217
221
|
complex128,
|
|
@@ -242,9 +246,17 @@ from numpy import (
|
|
|
242
246
|
uint16,
|
|
243
247
|
uint32,
|
|
244
248
|
uint64,
|
|
245
|
-
unicode_,
|
|
246
249
|
unsignedinteger,
|
|
247
250
|
void,
|
|
248
251
|
)
|
|
249
252
|
|
|
253
|
+
try:
|
|
254
|
+
from numpy import cfloat
|
|
255
|
+
except ImportError:
|
|
256
|
+
from numpy import cdouble as cfloat
|
|
257
|
+
try:
|
|
258
|
+
from numpy import str_ as unicode_
|
|
259
|
+
except ImportError:
|
|
260
|
+
from numpy import unicode_
|
|
261
|
+
|
|
250
262
|
del fetch, ufunc
|
maxframe/tensor/merge/vstack.py
CHANGED
maxframe/tests/utils.py
CHANGED
|
@@ -18,11 +18,13 @@ import hashlib
|
|
|
18
18
|
import os
|
|
19
19
|
import queue
|
|
20
20
|
import socket
|
|
21
|
+
import time
|
|
21
22
|
import types
|
|
22
23
|
from threading import Thread
|
|
23
24
|
from typing import Dict, List, Optional, Set, Tuple
|
|
24
25
|
|
|
25
26
|
import pytest
|
|
27
|
+
from odps import ODPS
|
|
26
28
|
from tornado import netutil
|
|
27
29
|
|
|
28
30
|
from ..core import Tileable, TileableGraph
|
|
@@ -171,3 +173,17 @@ def get_test_unique_name(size=None):
|
|
|
171
173
|
if size:
|
|
172
174
|
digest = digest[:size]
|
|
173
175
|
return digest + "_" + str(os.getpid())
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def assert_mf_index_dtype(idx_obj, dtype):
|
|
179
|
+
from ..dataframe.core import IndexValue
|
|
180
|
+
|
|
181
|
+
assert isinstance(idx_obj, IndexValue.IndexBase) and idx_obj.dtype == dtype
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def ensure_table_deleted(odps_entry: ODPS, table_name: str) -> None:
|
|
185
|
+
retry_times = 20
|
|
186
|
+
while odps_entry.exist_table(table_name) and retry_times > 0:
|
|
187
|
+
time.sleep(1)
|
|
188
|
+
retry_times -= 1
|
|
189
|
+
assert not odps_entry.exist_table(table_name)
|
maxframe/udf.py
CHANGED
|
@@ -19,6 +19,7 @@ from odps.models import Resource
|
|
|
19
19
|
|
|
20
20
|
from .serialization.serializables import (
|
|
21
21
|
BoolField,
|
|
22
|
+
DictField,
|
|
22
23
|
FieldTypes,
|
|
23
24
|
FunctionField,
|
|
24
25
|
ListField,
|
|
@@ -54,6 +55,10 @@ class MarkedFunction(Serializable):
|
|
|
54
55
|
func = FunctionField("func")
|
|
55
56
|
resources = ListField("resources", FieldTypes.string, default_factory=list)
|
|
56
57
|
pythonpacks = ListField("pythonpacks", FieldTypes.reference, default_factory=list)
|
|
58
|
+
expect_engine = StringField("expect_engine", default=None)
|
|
59
|
+
expect_resources = DictField(
|
|
60
|
+
"expect_resources", FieldTypes.string, default_factory=dict
|
|
61
|
+
)
|
|
57
62
|
|
|
58
63
|
def __init__(self, func: Optional[Callable] = None, **kw):
|
|
59
64
|
super().__init__(func=func, **kw)
|
|
@@ -120,6 +125,28 @@ def with_python_requirements(
|
|
|
120
125
|
return func_wrapper
|
|
121
126
|
|
|
122
127
|
|
|
128
|
+
def with_running_options(
|
|
129
|
+
*,
|
|
130
|
+
engine: Optional[str] = None,
|
|
131
|
+
cpu: Optional[int] = None,
|
|
132
|
+
memory: Optional[int] = None,
|
|
133
|
+
**kwargs,
|
|
134
|
+
):
|
|
135
|
+
engine = engine.upper() if engine else None
|
|
136
|
+
resources = {"cpu": cpu, "memory": memory, **kwargs}
|
|
137
|
+
|
|
138
|
+
def func_wrapper(func):
|
|
139
|
+
if all(v is None for v in (engine, cpu, memory)):
|
|
140
|
+
return func
|
|
141
|
+
if isinstance(func, MarkedFunction):
|
|
142
|
+
func.expect_engine = engine
|
|
143
|
+
func.expect_resources = resources
|
|
144
|
+
return func
|
|
145
|
+
return MarkedFunction(func, expect_engine=engine, expect_resources=resources)
|
|
146
|
+
|
|
147
|
+
return func_wrapper
|
|
148
|
+
|
|
149
|
+
|
|
123
150
|
with_resource_libraries = with_resources
|
|
124
151
|
|
|
125
152
|
|
maxframe/utils.py
CHANGED
|
@@ -835,8 +835,41 @@ def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
|
|
|
835
835
|
raise ValueError(f"Unknown limitation value: {value}")
|
|
836
836
|
|
|
837
837
|
|
|
838
|
-
def remove_suffix(value: str, suffix: str) -> str:
|
|
839
|
-
|
|
838
|
+
def remove_suffix(value: str, suffix: str) -> Tuple[str, bool]:
|
|
839
|
+
"""
|
|
840
|
+
Remove a suffix from a given string if it exists.
|
|
841
|
+
|
|
842
|
+
Parameters
|
|
843
|
+
----------
|
|
844
|
+
value : str
|
|
845
|
+
The original string.
|
|
846
|
+
suffix : str
|
|
847
|
+
The suffix to be removed.
|
|
848
|
+
|
|
849
|
+
Returns
|
|
850
|
+
-------
|
|
851
|
+
Tuple[str, bool]
|
|
852
|
+
A tuple containing the modified string and a boolean indicating whether the suffix was found.
|
|
853
|
+
"""
|
|
854
|
+
|
|
855
|
+
# Check if the suffix is an empty string
|
|
856
|
+
if len(suffix) == 0:
|
|
857
|
+
# If the suffix is empty, return the original string with True
|
|
858
|
+
return value, True
|
|
859
|
+
|
|
860
|
+
# Check if the length of the value is less than the length of the suffix
|
|
861
|
+
if len(value) < len(suffix):
|
|
862
|
+
# If the value is shorter than the suffix, it cannot have the suffix
|
|
863
|
+
return value, False
|
|
864
|
+
|
|
865
|
+
# Check if the suffix matches the end of the value
|
|
866
|
+
match = value.endswith(suffix)
|
|
867
|
+
|
|
868
|
+
# If the suffix is found, remove it; otherwise, return the original string
|
|
869
|
+
if match:
|
|
870
|
+
return value[: -len(suffix)], match
|
|
871
|
+
else:
|
|
872
|
+
return value, match
|
|
840
873
|
|
|
841
874
|
|
|
842
875
|
def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
|
|
@@ -1081,7 +1114,6 @@ def collect_leaf_operators(root) -> List[Type]:
|
|
|
1081
1114
|
|
|
1082
1115
|
@contextmanager
|
|
1083
1116
|
def sync_pyodps_options():
|
|
1084
|
-
from odps.config import OptionError
|
|
1085
1117
|
from odps.config import option_context as pyodps_option_context
|
|
1086
1118
|
|
|
1087
1119
|
from .config import options
|
|
@@ -1089,13 +1121,15 @@ def sync_pyodps_options():
|
|
|
1089
1121
|
with pyodps_option_context() as cfg:
|
|
1090
1122
|
cfg.local_timezone = options.local_timezone
|
|
1091
1123
|
if options.session.enable_schema:
|
|
1092
|
-
|
|
1093
|
-
cfg.enable_schema = options.session.enable_schema
|
|
1094
|
-
except (AttributeError, OptionError):
|
|
1095
|
-
# fixme enable_schema only supported in PyODPS 0.12.0 or later
|
|
1096
|
-
cfg.always_enable_schema = options.session.enable_schema
|
|
1124
|
+
cfg.enable_schema = options.session.enable_schema
|
|
1097
1125
|
yield
|
|
1098
1126
|
|
|
1099
1127
|
|
|
1100
1128
|
def str_to_bool(s: Optional[str]) -> Optional[bool]:
|
|
1101
1129
|
return s.lower().strip() in ("true", "1") if s is not None else None
|
|
1130
|
+
|
|
1131
|
+
|
|
1132
|
+
def is_empty(val):
|
|
1133
|
+
if isinstance(val, (pd.DataFrame, pd.Series, pd.Index)):
|
|
1134
|
+
return val.empty
|
|
1135
|
+
return not bool(val)
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: maxframe
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.1
|
|
4
4
|
Summary: MaxFrame operator-based data analyze framework
|
|
5
5
|
Requires-Dist: numpy<2.0.0,>=1.19.0
|
|
6
6
|
Requires-Dist: pandas>=1.0.0
|
|
7
|
-
Requires-Dist: pyodps>=0.
|
|
7
|
+
Requires-Dist: pyodps>=0.12.0
|
|
8
8
|
Requires-Dist: scipy>=1.0
|
|
9
9
|
Requires-Dist: pyarrow>=1.0.0
|
|
10
10
|
Requires-Dist: msgpack>=1.0.0
|
|
11
11
|
Requires-Dist: traitlets>=5.0
|
|
12
12
|
Requires-Dist: cloudpickle<3.0.0,>=1.5.0
|
|
13
13
|
Requires-Dist: pyyaml>=5.1
|
|
14
|
+
Requires-Dist: pickle5; python_version < "3.8"
|
|
14
15
|
Requires-Dist: tornado>=6.0
|
|
15
16
|
Requires-Dist: defusedxml>=0.5.0
|
|
16
17
|
Requires-Dist: tqdm>=4.1.0
|
|
17
|
-
Requires-Dist:
|
|
18
|
-
Requires-Dist: pickle5; python_version < "3.8"
|
|
18
|
+
Requires-Dist: importlib_metadata>=1.4
|
|
19
19
|
Provides-Extra: dev
|
|
20
20
|
Requires-Dist: black>=22.3.0; extra == "dev"
|
|
21
21
|
Requires-Dist: flake8>=5.0.4; extra == "dev"
|