maxframe 2.2.0__cp312-cp312-macosx_10_9_universal2.whl → 2.3.0rc1__cp312-cp312-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-312-darwin.so +0 -0
- maxframe/codegen/core.py +3 -2
- maxframe/codegen/spe/dataframe/merge.py +4 -0
- maxframe/codegen/spe/dataframe/misc.py +2 -0
- maxframe/codegen/spe/dataframe/reduction.py +18 -0
- maxframe/codegen/spe/dataframe/sort.py +9 -1
- maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
- maxframe/codegen/spe/dataframe/tseries.py +9 -0
- maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
- maxframe/codegen/spe/tensor/datasource.py +1 -0
- maxframe/config/config.py +3 -0
- maxframe/conftest.py +10 -0
- maxframe/core/base.py +2 -1
- maxframe/core/entity/tileables.py +2 -0
- maxframe/core/graph/entity.py +7 -1
- maxframe/core/mode.py +6 -1
- maxframe/dataframe/__init__.py +2 -2
- maxframe/dataframe/arithmetic/__init__.py +4 -0
- maxframe/dataframe/arithmetic/maximum.py +33 -0
- maxframe/dataframe/arithmetic/minimum.py +33 -0
- maxframe/dataframe/core.py +98 -106
- maxframe/dataframe/datasource/core.py +6 -0
- maxframe/dataframe/datasource/direct.py +57 -0
- maxframe/dataframe/datasource/read_csv.py +19 -11
- maxframe/dataframe/datasource/read_odps_query.py +29 -6
- maxframe/dataframe/datasource/read_odps_table.py +32 -10
- maxframe/dataframe/datasource/read_parquet.py +38 -39
- maxframe/dataframe/datastore/__init__.py +6 -0
- maxframe/dataframe/datastore/direct.py +268 -0
- maxframe/dataframe/datastore/to_odps.py +6 -0
- maxframe/dataframe/extensions/flatjson.py +2 -1
- maxframe/dataframe/groupby/__init__.py +5 -1
- maxframe/dataframe/groupby/aggregation.py +10 -6
- maxframe/dataframe/groupby/apply_chunk.py +1 -3
- maxframe/dataframe/groupby/core.py +20 -4
- maxframe/dataframe/indexing/__init__.py +2 -1
- maxframe/dataframe/indexing/insert.py +45 -17
- maxframe/dataframe/merge/__init__.py +3 -0
- maxframe/dataframe/merge/combine.py +244 -0
- maxframe/dataframe/misc/__init__.py +14 -3
- maxframe/dataframe/misc/check_unique.py +41 -10
- maxframe/dataframe/misc/drop.py +31 -0
- maxframe/dataframe/misc/infer_dtypes.py +251 -0
- maxframe/dataframe/misc/map.py +31 -18
- maxframe/dataframe/misc/repeat.py +159 -0
- maxframe/dataframe/misc/tests/test_misc.py +35 -1
- maxframe/dataframe/missing/checkna.py +3 -2
- maxframe/dataframe/reduction/__init__.py +10 -5
- maxframe/dataframe/reduction/aggregation.py +6 -6
- maxframe/dataframe/reduction/argmax.py +7 -4
- maxframe/dataframe/reduction/argmin.py +7 -4
- maxframe/dataframe/reduction/core.py +18 -9
- maxframe/dataframe/reduction/mode.py +144 -0
- maxframe/dataframe/reduction/nunique.py +10 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
- maxframe/dataframe/sort/__init__.py +9 -2
- maxframe/dataframe/sort/argsort.py +7 -1
- maxframe/dataframe/sort/core.py +1 -1
- maxframe/dataframe/sort/rank.py +147 -0
- maxframe/dataframe/tseries/__init__.py +19 -0
- maxframe/dataframe/tseries/at_time.py +61 -0
- maxframe/dataframe/tseries/between_time.py +122 -0
- maxframe/dataframe/utils.py +30 -26
- maxframe/learn/contrib/llm/core.py +16 -7
- maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/config.py +221 -0
- maxframe/learn/contrib/llm/deploy/core.py +247 -0
- maxframe/learn/contrib/llm/deploy/framework.py +35 -0
- maxframe/learn/contrib/llm/deploy/loader.py +360 -0
- maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
- maxframe/learn/contrib/llm/models/__init__.py +1 -0
- maxframe/learn/contrib/llm/models/dashscope.py +12 -6
- maxframe/learn/contrib/llm/models/managed.py +76 -11
- maxframe/learn/contrib/llm/models/openai.py +72 -0
- maxframe/learn/contrib/llm/tests/__init__.py +13 -0
- maxframe/learn/contrib/llm/tests/test_core.py +34 -0
- maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
- maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
- maxframe/learn/contrib/llm/text.py +348 -42
- maxframe/learn/contrib/models.py +4 -1
- maxframe/learn/contrib/xgboost/classifier.py +2 -0
- maxframe/learn/contrib/xgboost/core.py +31 -7
- maxframe/learn/contrib/xgboost/predict.py +4 -2
- maxframe/learn/contrib/xgboost/regressor.py +5 -0
- maxframe/learn/contrib/xgboost/train.py +2 -0
- maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
- maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
- maxframe/learn/utils/__init__.py +1 -0
- maxframe/learn/utils/extmath.py +42 -9
- maxframe/learn/utils/odpsio.py +80 -11
- maxframe/lib/filesystem/_oss_lib/common.py +2 -0
- maxframe/lib/mmh3.cpython-312-darwin.so +0 -0
- maxframe/opcodes.py +9 -1
- maxframe/remote/core.py +4 -0
- maxframe/serialization/core.cpython-312-darwin.so +0 -0
- maxframe/serialization/tests/test_serial.py +2 -2
- maxframe/tensor/arithmetic/__init__.py +1 -1
- maxframe/tensor/arithmetic/core.py +2 -2
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +0 -9
- maxframe/tensor/core.py +3 -0
- maxframe/tensor/misc/copyto.py +1 -1
- maxframe/tests/test_udf.py +61 -0
- maxframe/tests/test_utils.py +8 -5
- maxframe/udf.py +103 -7
- maxframe/utils.py +61 -8
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +1 -2
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +112 -89
- maxframe_client/session/task.py +8 -1
- maxframe_client/tests/test_session.py +24 -0
- maxframe/dataframe/arrays.py +0 -864
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
- {maxframe-2.2.0.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright 1999-2025 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import textwrap
|
|
16
|
+
|
|
17
|
+
from odps import ODPS
|
|
18
|
+
from odps.errors import NoSuchObject
|
|
19
|
+
|
|
20
|
+
from maxframe.tests.utils import tn
|
|
21
|
+
from maxframe.udf import ODPSFunction
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_odps_function():
|
|
25
|
+
func_body = """from odps.udf import annotate
|
|
26
|
+
@annotate("bigint->bigint")
|
|
27
|
+
class MyMul(object):
|
|
28
|
+
def evaluate(self, arg0):
|
|
29
|
+
return arg0 * 2 if arg0 is not None else None"""
|
|
30
|
+
odps_entry = ODPS.from_environments()
|
|
31
|
+
res_name = tn("test_res")
|
|
32
|
+
func_name = tn("test_odps_func")
|
|
33
|
+
|
|
34
|
+
def _cleanup():
|
|
35
|
+
try:
|
|
36
|
+
odps_entry.delete_resource(res_name + ".py")
|
|
37
|
+
except NoSuchObject:
|
|
38
|
+
pass
|
|
39
|
+
try:
|
|
40
|
+
odps_entry.delete_function(func_name)
|
|
41
|
+
except NoSuchObject:
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
_cleanup()
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
test_res = odps_entry.create_resource(
|
|
48
|
+
res_name + ".py", "py", fileobj=textwrap.dedent(func_body)
|
|
49
|
+
)
|
|
50
|
+
test_odps_func_obj = odps_entry.create_function(
|
|
51
|
+
func_name, class_type=f"{res_name}.MyMul", resources=[test_res]
|
|
52
|
+
)
|
|
53
|
+
func = ODPSFunction.wrap(test_odps_func_obj)
|
|
54
|
+
assert isinstance(func, ODPSFunction)
|
|
55
|
+
assert func.__name__ == func_name
|
|
56
|
+
assert func.full_function_name in (
|
|
57
|
+
f"{odps_entry.project}:{func_name}",
|
|
58
|
+
f"{odps_entry.project}:default:{func_name}",
|
|
59
|
+
)
|
|
60
|
+
finally:
|
|
61
|
+
_cleanup()
|
maxframe/tests/test_utils.py
CHANGED
|
@@ -31,6 +31,7 @@ import pyarrow as pa
|
|
|
31
31
|
import pytest
|
|
32
32
|
|
|
33
33
|
from .. import utils
|
|
34
|
+
from ..lib.dtypes_extension import ArrowDtype
|
|
34
35
|
from ..serialization import PickleContainer
|
|
35
36
|
from ..utils import parse_size_to_megabytes, validate_and_adjust_resource_ratio
|
|
36
37
|
|
|
@@ -298,11 +299,11 @@ def test_estimate_pandas_size():
|
|
|
298
299
|
s1 = pd.Series(np.random.rand(1000))
|
|
299
300
|
assert utils.estimate_pandas_size(s1) == sys.getsizeof(s1)
|
|
300
301
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
302
|
+
if hasattr(pd, "ArrowDtype"):
|
|
303
|
+
arrow_array = pa.array(np.random.choice(["abcd", "def", "gh"], size=(1000,)))
|
|
304
|
+
array = pd.array(arrow_array, dtype=ArrowDtype(arrow_array.type))
|
|
305
|
+
s2 = pd.Series(array)
|
|
306
|
+
assert utils.estimate_pandas_size(s2) == sys.getsizeof(s2)
|
|
306
307
|
|
|
307
308
|
s3 = pd.Series(np.random.choice(["abcd", "def", "gh"], size=(1000,)))
|
|
308
309
|
assert (
|
|
@@ -366,6 +367,8 @@ def test_arrow_type_from_string():
|
|
|
366
367
|
_assert_arrow_type_convert(pa.decimal128(10, 2))
|
|
367
368
|
_assert_arrow_type_convert(pa.list_(pa.int64()))
|
|
368
369
|
_assert_arrow_type_convert(pa.map_(pa.string(), pa.int64()))
|
|
370
|
+
_assert_arrow_type_convert(pa.date32())
|
|
371
|
+
_assert_arrow_type_convert(pa.date64())
|
|
369
372
|
_assert_arrow_type_convert(
|
|
370
373
|
pa.struct([("key", pa.string()), ("value", pa.list_(pa.int64()))])
|
|
371
374
|
)
|
maxframe/udf.py
CHANGED
|
@@ -13,13 +13,18 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import shlex
|
|
16
|
+
import sys
|
|
16
17
|
from typing import Callable, List, Optional, Union
|
|
17
18
|
|
|
18
|
-
|
|
19
|
+
import numpy as np
|
|
20
|
+
from odps.models import Function as ODPSFunctionObj
|
|
21
|
+
from odps.models import Resource as ODPSResourceObj
|
|
19
22
|
|
|
20
23
|
from .config.validators import is_positive_integer
|
|
24
|
+
from .core.mode import is_mock_mode
|
|
21
25
|
from .serialization import load_member
|
|
22
26
|
from .serialization.serializables import (
|
|
27
|
+
AnyField,
|
|
23
28
|
BoolField,
|
|
24
29
|
DictField,
|
|
25
30
|
FieldTypes,
|
|
@@ -28,7 +33,8 @@ from .serialization.serializables import (
|
|
|
28
33
|
Serializable,
|
|
29
34
|
StringField,
|
|
30
35
|
)
|
|
31
|
-
from .
|
|
36
|
+
from .typing_ import PandasDType
|
|
37
|
+
from .utils import extract_class_name, make_dtype, tokenize
|
|
32
38
|
|
|
33
39
|
|
|
34
40
|
class PythonPackOptions(Serializable):
|
|
@@ -122,8 +128,100 @@ class MarkedFunction(Serializable):
|
|
|
122
128
|
return f"<MarkedFunction {self.func!r}>"
|
|
123
129
|
|
|
124
130
|
|
|
125
|
-
|
|
126
|
-
|
|
131
|
+
class ODPSFunction(Serializable):
|
|
132
|
+
__slots__ = ("_caller_type",)
|
|
133
|
+
|
|
134
|
+
full_function_name = StringField("full_function_name")
|
|
135
|
+
expect_engine = StringField("expect_engine", default=None)
|
|
136
|
+
expect_resources = DictField(
|
|
137
|
+
"expect_resources", FieldTypes.string, default_factory=dict
|
|
138
|
+
)
|
|
139
|
+
result_dtype = AnyField("result_dtype", default=None)
|
|
140
|
+
|
|
141
|
+
def __init__(
|
|
142
|
+
self,
|
|
143
|
+
func,
|
|
144
|
+
expect_engine: str = None,
|
|
145
|
+
expect_resources: dict = None,
|
|
146
|
+
dtype: PandasDType = None,
|
|
147
|
+
**kw,
|
|
148
|
+
):
|
|
149
|
+
full_function_name = None
|
|
150
|
+
if isinstance(func, str):
|
|
151
|
+
full_function_name = func
|
|
152
|
+
elif isinstance(func, ODPSFunctionObj):
|
|
153
|
+
func_parts = [func.project.name]
|
|
154
|
+
if func.schema:
|
|
155
|
+
func_parts.append(func.schema.name)
|
|
156
|
+
func_parts.append(func.name)
|
|
157
|
+
full_function_name = ":".join(func_parts)
|
|
158
|
+
if full_function_name:
|
|
159
|
+
kw["full_function_name"] = full_function_name
|
|
160
|
+
|
|
161
|
+
if dtype is not None:
|
|
162
|
+
kw["result_dtype"] = make_dtype(dtype)
|
|
163
|
+
super().__init__(
|
|
164
|
+
expect_engine=expect_engine, expect_resources=expect_resources, **kw
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def __name__(self):
|
|
169
|
+
return self.full_function_name.rsplit(":", 1)[-1]
|
|
170
|
+
|
|
171
|
+
def _detect_caller_type(self) -> Optional[str]:
|
|
172
|
+
if hasattr(self, "_caller_type"):
|
|
173
|
+
return self._caller_type
|
|
174
|
+
|
|
175
|
+
frame = sys._getframe(1)
|
|
176
|
+
is_set = False
|
|
177
|
+
while frame.f_back:
|
|
178
|
+
f_mod = frame.f_globals.get("__name__")
|
|
179
|
+
if f_mod and f_mod.startswith("maxframe.dataframe."):
|
|
180
|
+
if f_mod.endswith(".map"):
|
|
181
|
+
self._caller_type, is_set = "map", True
|
|
182
|
+
elif f_mod.endswith(".aggregation") or ".reduction." in f_mod:
|
|
183
|
+
self._caller_type, is_set = "agg", True
|
|
184
|
+
if is_set:
|
|
185
|
+
return self._caller_type
|
|
186
|
+
frame = frame.f_back
|
|
187
|
+
return None
|
|
188
|
+
|
|
189
|
+
def __call__(self, obj, *args, **kwargs):
|
|
190
|
+
caller_type = self._detect_caller_type()
|
|
191
|
+
if caller_type == "agg":
|
|
192
|
+
return self._call_aggregate(obj, *args, **kwargs)
|
|
193
|
+
raise NotImplementedError("Need to be referenced inside apply or map functions")
|
|
194
|
+
|
|
195
|
+
def _call_aggregate(self, obj, *args, **kwargs):
|
|
196
|
+
from .dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
197
|
+
from .dataframe.reduction.custom_reduction import build_custom_reduction_result
|
|
198
|
+
|
|
199
|
+
if isinstance(obj, (DATAFRAME_TYPE, SERIES_TYPE)):
|
|
200
|
+
return build_custom_reduction_result(obj, self)
|
|
201
|
+
if is_mock_mode():
|
|
202
|
+
ret = obj.iloc[0]
|
|
203
|
+
if self.result_dtype:
|
|
204
|
+
if hasattr(ret, "astype"):
|
|
205
|
+
ret = ret.astype(self.result_dtype)
|
|
206
|
+
else: # pragma: no cover
|
|
207
|
+
ret = np.array(ret).astype(self.result_dtype).item()
|
|
208
|
+
return ret
|
|
209
|
+
raise NotImplementedError("Need to be referenced inside apply or map functions")
|
|
210
|
+
|
|
211
|
+
def __repr__(self):
|
|
212
|
+
return f"<ODPSStoredFunction {self.full_function_name}>"
|
|
213
|
+
|
|
214
|
+
@classmethod
|
|
215
|
+
def wrap(cls, func):
|
|
216
|
+
if isinstance(func, ODPSFunctionObj):
|
|
217
|
+
return ODPSFunction(func)
|
|
218
|
+
return func
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def with_resources(
|
|
222
|
+
*resources: Union[str, ODPSResourceObj], use_wrapper_class: bool = True
|
|
223
|
+
):
|
|
224
|
+
def res_to_str(res: Union[str, ODPSResourceObj]) -> str:
|
|
127
225
|
if isinstance(res, str):
|
|
128
226
|
return res
|
|
129
227
|
res_parts = [res.project.name]
|
|
@@ -250,9 +348,7 @@ def with_running_options(
|
|
|
250
348
|
with_resource_libraries = with_resources
|
|
251
349
|
|
|
252
350
|
|
|
253
|
-
def get_udf_resources(
|
|
254
|
-
func: Callable,
|
|
255
|
-
) -> List[Union[Resource, str]]:
|
|
351
|
+
def get_udf_resources(func: Callable) -> List[Union[ODPSResourceObj, str]]:
|
|
256
352
|
return getattr(func, "resources", None) or []
|
|
257
353
|
|
|
258
354
|
|
maxframe/utils.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio.events
|
|
16
16
|
import concurrent.futures
|
|
17
|
+
import contextlib
|
|
17
18
|
import contextvars
|
|
18
19
|
import copy
|
|
19
20
|
import dataclasses
|
|
@@ -80,6 +81,7 @@ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
|
|
|
80
81
|
tokenize,
|
|
81
82
|
tokenize_int,
|
|
82
83
|
)
|
|
84
|
+
from .lib.dtypes_extension import ArrowDtype
|
|
83
85
|
from .lib.version import parse as parse_version
|
|
84
86
|
from .typing_ import TileableType, TimeoutType
|
|
85
87
|
|
|
@@ -204,13 +206,28 @@ def on_serialize_nsplits(value: Tuple[Tuple[int]]):
|
|
|
204
206
|
return tuple(new_nsplits)
|
|
205
207
|
|
|
206
208
|
|
|
207
|
-
def has_unknown_shape(
|
|
209
|
+
def has_unknown_shape(
|
|
210
|
+
*tiled_tileables: TileableType, axis: Union[None, int, List[int]] = None
|
|
211
|
+
) -> bool:
|
|
212
|
+
if isinstance(axis, int):
|
|
213
|
+
axis = [axis]
|
|
214
|
+
|
|
208
215
|
for tileable in tiled_tileables:
|
|
209
216
|
if getattr(tileable, "shape", None) is None:
|
|
210
217
|
continue
|
|
211
|
-
|
|
218
|
+
|
|
219
|
+
shape_iter = (
|
|
220
|
+
tileable.shape if axis is None else (tileable.shape[idx] for idx in axis)
|
|
221
|
+
)
|
|
222
|
+
if any(pd.isnull(s) for s in shape_iter):
|
|
212
223
|
return True
|
|
213
|
-
|
|
224
|
+
|
|
225
|
+
nsplits_iter = (
|
|
226
|
+
tileable.nsplits
|
|
227
|
+
if axis is None
|
|
228
|
+
else (tileable.nsplits[idx] for idx in axis)
|
|
229
|
+
)
|
|
230
|
+
if any(pd.isnull(s) for s in itertools.chain(*nsplits_iter)):
|
|
214
231
|
return True
|
|
215
232
|
return False
|
|
216
233
|
|
|
@@ -281,7 +298,10 @@ def make_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
|
|
|
281
298
|
elif dtype is pd.Timedelta or dtype is datetime.timedelta:
|
|
282
299
|
return np.dtype("timedelta64[ns]")
|
|
283
300
|
else:
|
|
284
|
-
|
|
301
|
+
try:
|
|
302
|
+
return pd.api.types.pandas_dtype(dtype)
|
|
303
|
+
except TypeError:
|
|
304
|
+
return np.dtype("O")
|
|
285
305
|
|
|
286
306
|
|
|
287
307
|
def make_dtypes(
|
|
@@ -448,7 +468,10 @@ def create_sync_primitive(
|
|
|
448
468
|
return cls(loop=loop)
|
|
449
469
|
|
|
450
470
|
# From Python3.10 the loop parameter has been removed. We should work around here.
|
|
451
|
-
|
|
471
|
+
try:
|
|
472
|
+
old_loop = asyncio.get_event_loop()
|
|
473
|
+
except RuntimeError:
|
|
474
|
+
old_loop = None
|
|
452
475
|
try:
|
|
453
476
|
asyncio.set_event_loop(loop)
|
|
454
477
|
primitive = cls()
|
|
@@ -599,8 +622,6 @@ def estimate_pandas_size(
|
|
|
599
622
|
# MultiIndex's sample size can't be used to estimate
|
|
600
623
|
return sys.getsizeof(pd_obj)
|
|
601
624
|
|
|
602
|
-
from .dataframe.arrays import ArrowDtype
|
|
603
|
-
|
|
604
625
|
def _is_fast_dtype(dtype):
|
|
605
626
|
if isinstance(dtype, np.dtype):
|
|
606
627
|
return np.issubdtype(dtype, np.number)
|
|
@@ -1182,13 +1203,16 @@ if pa:
|
|
|
1182
1203
|
"float": pa.float32,
|
|
1183
1204
|
"double": pa.float64,
|
|
1184
1205
|
"decimal": pa.decimal128,
|
|
1206
|
+
# repr() of date32 and date64 has `day` or `ms`
|
|
1207
|
+
# which is not needed in constructors
|
|
1208
|
+
"date32": lambda *_: pa.date32(),
|
|
1209
|
+
"date64": lambda *_: pa.date64(),
|
|
1185
1210
|
}
|
|
1186
1211
|
_plain_arrow_types = """
|
|
1187
1212
|
null
|
|
1188
1213
|
int8 int16 int32 int64
|
|
1189
1214
|
uint8 uint16 uint32 uint64
|
|
1190
1215
|
float16 float32 float64
|
|
1191
|
-
date32 date64
|
|
1192
1216
|
decimal128 decimal256
|
|
1193
1217
|
string utf8 binary
|
|
1194
1218
|
time32 time64 duration timestamp
|
|
@@ -1719,3 +1743,32 @@ def validate_and_adjust_resource_ratio(
|
|
|
1719
1743
|
)
|
|
1720
1744
|
|
|
1721
1745
|
return expect_resources, False
|
|
1746
|
+
|
|
1747
|
+
|
|
1748
|
+
def get_pd_option(option_name, default=no_default):
|
|
1749
|
+
"""Get pandas option. If not exist return `default`."""
|
|
1750
|
+
try:
|
|
1751
|
+
with warnings.catch_warnings():
|
|
1752
|
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
|
1753
|
+
return pd.get_option(option_name)
|
|
1754
|
+
except (KeyError, AttributeError):
|
|
1755
|
+
if default is no_default:
|
|
1756
|
+
raise
|
|
1757
|
+
return default
|
|
1758
|
+
|
|
1759
|
+
|
|
1760
|
+
@contextlib.contextmanager
|
|
1761
|
+
def pd_option_context(*args):
|
|
1762
|
+
arg_kv = dict(zip(args[0::2], args[1::2]))
|
|
1763
|
+
new_args = []
|
|
1764
|
+
for k, v in arg_kv.items():
|
|
1765
|
+
try:
|
|
1766
|
+
get_pd_option(k)
|
|
1767
|
+
except (KeyError, AttributeError): # pragma: no cover
|
|
1768
|
+
continue
|
|
1769
|
+
new_args.extend([k, v])
|
|
1770
|
+
if not new_args: # pragma: no cover
|
|
1771
|
+
yield
|
|
1772
|
+
else:
|
|
1773
|
+
with pd.option_context(*new_args):
|
|
1774
|
+
yield
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: maxframe
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.3.0rc1
|
|
4
4
|
Summary: MaxFrame operator-based data analyze framework
|
|
5
5
|
Requires-Dist: numpy<2.0.0,>=1.19.0
|
|
6
6
|
Requires-Dist: pandas>=1.0.0
|
|
@@ -107,4 +107,3 @@ License
|
|
|
107
107
|
|
|
108
108
|
Licensed under the `Apache License
|
|
109
109
|
2.0 <https://www.apache.org/licenses/LICENSE-2.0.html>`__.
|
|
110
|
-
|