maxframe 0.1.0b4__cp310-cp310-macosx_10_9_universal2.whl → 1.0.0__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cpython-310-darwin.so +0 -0
- maxframe/codegen.py +56 -5
- maxframe/config/config.py +78 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cpython-310-darwin.so +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +2 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +58 -12
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +120 -24
- maxframe/dataframe/datasource/read_odps_table.py +9 -4
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +6 -11
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +93 -1
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +4 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +33 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +110 -0
- maxframe/learn/contrib/xgboost/core.py +241 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
- maxframe/learn/contrib/xgboost/predict.py +121 -0
- maxframe/learn/contrib/xgboost/regressor.py +71 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +132 -0
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +11 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cpython-310-darwin.so +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +37 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +7 -2
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +4 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/misc/unique.py +205 -0
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +62 -3
- maxframe/utils.py +112 -86
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +123 -54
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +223 -40
- maxframe_client/session/task.py +108 -80
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +136 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -300
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import xgboost
|
|
19
|
+
except ImportError:
|
|
20
|
+
xgboost = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
from ..... import tensor as mt
|
|
24
|
+
|
|
25
|
+
if xgboost:
|
|
26
|
+
from ..core import wrap_evaluation_matrices
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.mark.skipif(xgboost is None, reason="XGBoost not installed")
|
|
30
|
+
def test_wrap_evaluation_matrices():
|
|
31
|
+
X = mt.random.rand(100, 3)
|
|
32
|
+
y = mt.random.randint(3, size=(100,))
|
|
33
|
+
|
|
34
|
+
eval_set = [(mt.random.rand(10, 3), mt.random.randint(3, size=10))]
|
|
35
|
+
with pytest.raises(ValueError):
|
|
36
|
+
# sample_weight_eval_set size wrong
|
|
37
|
+
wrap_evaluation_matrices(0.0, X, y, None, None, eval_set, [], None)
|
|
38
|
+
|
|
39
|
+
with pytest.raises(ValueError):
|
|
40
|
+
wrap_evaluation_matrices(0.0, X, y, None, None, None, eval_set, None)
|
|
41
|
+
|
|
42
|
+
evals = wrap_evaluation_matrices(0.0, X, y, None, None, eval_set, None, None)[1]
|
|
43
|
+
assert len(evals) > 0
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from collections import OrderedDict
|
|
17
|
+
|
|
18
|
+
from .... import opcodes
|
|
19
|
+
from ....core import OutputType
|
|
20
|
+
from ....core.operator.base import Operator
|
|
21
|
+
from ....core.operator.core import TileableOperatorMixin
|
|
22
|
+
from ....serialization.serializables import (
|
|
23
|
+
AnyField,
|
|
24
|
+
BoolField,
|
|
25
|
+
DictField,
|
|
26
|
+
FieldTypes,
|
|
27
|
+
FunctionField,
|
|
28
|
+
Int64Field,
|
|
29
|
+
KeyField,
|
|
30
|
+
ListField,
|
|
31
|
+
)
|
|
32
|
+
from .core import Booster
|
|
33
|
+
from .dmatrix import ToDMatrix, to_dmatrix
|
|
34
|
+
|
|
35
|
+
logger = logging.getLogger(__name__)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _on_serialize_evals(evals_val):
|
|
39
|
+
if evals_val is None:
|
|
40
|
+
return None
|
|
41
|
+
return [list(x) for x in evals_val]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class XGBTrain(Operator, TileableOperatorMixin):
|
|
45
|
+
_op_type_ = opcodes.XGBOOST_TRAIN
|
|
46
|
+
|
|
47
|
+
params = DictField("params", key_type=FieldTypes.string, default=None)
|
|
48
|
+
dtrain = KeyField("dtrain", default=None)
|
|
49
|
+
evals = ListField("evals", on_serialize=_on_serialize_evals, default=None)
|
|
50
|
+
obj = FunctionField("obj", default=None)
|
|
51
|
+
feval = FunctionField("obj", default=None)
|
|
52
|
+
maximize = BoolField("maximize", default=None)
|
|
53
|
+
early_stopping_rounds = Int64Field("early_stopping_rounds", default=None)
|
|
54
|
+
verbose_eval = AnyField("verbose_eval", default=None)
|
|
55
|
+
xgb_model = AnyField("xgb_model", default=None)
|
|
56
|
+
callbacks = ListField(
|
|
57
|
+
"callbacks", field_type=FunctionField.field_type, default=None
|
|
58
|
+
)
|
|
59
|
+
custom_metric = FunctionField("custom_metric", default=None)
|
|
60
|
+
num_boost_round = Int64Field("num_boost_round", default=10)
|
|
61
|
+
num_class = Int64Field("num_class", default=None)
|
|
62
|
+
|
|
63
|
+
def __init__(self, gpu=None, **kw):
|
|
64
|
+
super().__init__(gpu=gpu, **kw)
|
|
65
|
+
if self.output_types is None:
|
|
66
|
+
self.output_types = [OutputType.object]
|
|
67
|
+
if self.has_evals_result:
|
|
68
|
+
self.output_types.append(OutputType.object)
|
|
69
|
+
|
|
70
|
+
def _set_inputs(self, inputs):
|
|
71
|
+
super()._set_inputs(inputs)
|
|
72
|
+
self.dtrain = self._inputs[0]
|
|
73
|
+
rest = self._inputs[1:]
|
|
74
|
+
if self.has_evals_result:
|
|
75
|
+
evals_dict = OrderedDict(self.evals)
|
|
76
|
+
new_evals_dict = OrderedDict()
|
|
77
|
+
for new_key, val in zip(rest, evals_dict.values()):
|
|
78
|
+
new_evals_dict[new_key] = val
|
|
79
|
+
self.evals = list(new_evals_dict.items())
|
|
80
|
+
|
|
81
|
+
def __call__(self, evals_result):
|
|
82
|
+
inputs = [self.dtrain]
|
|
83
|
+
if self.has_evals_result:
|
|
84
|
+
inputs.extend(e[0] for e in self.evals)
|
|
85
|
+
return self.new_tileables(
|
|
86
|
+
inputs, object_class=Booster, evals_result=evals_result
|
|
87
|
+
)[0]
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def output_limit(self):
|
|
91
|
+
return 2 if self.has_evals_result else 1
|
|
92
|
+
|
|
93
|
+
@property
|
|
94
|
+
def has_evals_result(self) -> bool:
|
|
95
|
+
return self.evals
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def train(params, dtrain, evals=None, evals_result=None, num_class=None, **kwargs):
|
|
99
|
+
"""
|
|
100
|
+
Train XGBoost model in MaxFrame manner.
|
|
101
|
+
|
|
102
|
+
Parameters
|
|
103
|
+
----------
|
|
104
|
+
Parameters are the same as `xgboost.train`. Note that train is an eager-execution
|
|
105
|
+
API if evals is passed, thus the call will be blocked until training finished.
|
|
106
|
+
|
|
107
|
+
Returns
|
|
108
|
+
-------
|
|
109
|
+
results: Booster
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
evals_result = evals_result if evals_result is not None else dict()
|
|
113
|
+
processed_evals = []
|
|
114
|
+
session = kwargs.pop("session", None)
|
|
115
|
+
run_kwargs = kwargs.pop("run_kwargs", dict())
|
|
116
|
+
if evals:
|
|
117
|
+
for eval_dmatrix, name in evals:
|
|
118
|
+
if not isinstance(name, str):
|
|
119
|
+
raise TypeError("evals must a list of pairs (DMatrix, string)")
|
|
120
|
+
if hasattr(eval_dmatrix, "op") and isinstance(eval_dmatrix.op, ToDMatrix):
|
|
121
|
+
processed_evals.append((eval_dmatrix, name))
|
|
122
|
+
else:
|
|
123
|
+
processed_evals.append((to_dmatrix(eval_dmatrix), name))
|
|
124
|
+
data = XGBTrain(
|
|
125
|
+
params=params,
|
|
126
|
+
dtrain=dtrain,
|
|
127
|
+
evals=processed_evals,
|
|
128
|
+
evals_result=evals_result,
|
|
129
|
+
num_class=num_class,
|
|
130
|
+
**kwargs,
|
|
131
|
+
)(evals_result)
|
|
132
|
+
return data.execute(session=session, **run_kwargs) if evals else data
|
|
@@ -12,18 +12,15 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from
|
|
16
|
-
from ...serialization.serializables import ReferenceField
|
|
17
|
-
from ..graph import ChunkGraph
|
|
18
|
-
from .base import Operator
|
|
15
|
+
from ..core.entity.objects import Object, ObjectData
|
|
19
16
|
|
|
20
17
|
|
|
21
|
-
class
|
|
22
|
-
|
|
23
|
-
_op_type_ = opcodes.FUSE
|
|
18
|
+
class ModelData(ObjectData):
|
|
19
|
+
pass
|
|
24
20
|
|
|
25
|
-
fuse_graph = ReferenceField("fuse_graph", ChunkGraph)
|
|
26
21
|
|
|
22
|
+
class Model(Object):
|
|
23
|
+
pass
|
|
27
24
|
|
|
28
|
-
|
|
29
|
-
|
|
25
|
+
|
|
26
|
+
MODEL_TYPE = (Model, ModelData)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from .core import convert_to_tensor_or_dataframe
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
from ...dataframe import DataFrame, Series
|
|
18
|
+
from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
|
|
19
|
+
from ...tensor import tensor as astensor
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def convert_to_tensor_or_dataframe(item):
|
|
23
|
+
if isinstance(item, (DATAFRAME_TYPE, pd.DataFrame)):
|
|
24
|
+
item = DataFrame(item)
|
|
25
|
+
elif isinstance(item, (SERIES_TYPE, pd.Series)):
|
|
26
|
+
item = Series(item)
|
|
27
|
+
else:
|
|
28
|
+
item = astensor(item)
|
|
29
|
+
return item
|
|
Binary file
|
maxframe/lib/mmh3.pyi
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 1999-2024 Alibaba Group Holding Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
|
|
17
|
+
def hash(key, seed=0, signed=True) -> int:
|
|
18
|
+
"""
|
|
19
|
+
Return a 32 bit integer.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def hash_from_buffer(key, seed=0, signed=True) -> int:
|
|
23
|
+
"""
|
|
24
|
+
Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
|
|
28
|
+
"""
|
|
29
|
+
Return a tuple of two 64 bit integers for a string. Optimized for
|
|
30
|
+
the x64 bit architecture when x64arch=True, otherwise for x86.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def hash128(key, seed=0, x64arch=True, signed=False) -> int:
|
|
34
|
+
"""
|
|
35
|
+
Return a 128 bit long integer. Optimized for the x64 bit architecture
|
|
36
|
+
when x64arch=True, otherwise for x86.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
def hash_bytes(key, seed=0, x64arch=True) -> bytes:
|
|
40
|
+
"""
|
|
41
|
+
Return a 128 bit hash value as bytes for a string. Optimized for the
|
|
42
|
+
x64 bit architecture when x64arch=True, otherwise for the x86.
|
|
43
|
+
"""
|
|
@@ -55,13 +55,13 @@ def test_sparse_creation():
|
|
|
55
55
|
s = SparseNDArray(s1_data)
|
|
56
56
|
assert s.ndim == 2
|
|
57
57
|
assert isinstance(s, SparseMatrix)
|
|
58
|
-
assert_array_equal(s.toarray(), s1_data.
|
|
59
|
-
assert_array_equal(s.todense(), s1_data.
|
|
58
|
+
assert_array_equal(s.toarray(), s1_data.toarray())
|
|
59
|
+
assert_array_equal(s.todense(), s1_data.toarray())
|
|
60
60
|
|
|
61
61
|
ss = pickle.loads(pickle.dumps(s))
|
|
62
62
|
assert s == ss
|
|
63
|
-
assert_array_equal(ss.toarray(), s1_data.
|
|
64
|
-
assert_array_equal(ss.todense(), s1_data.
|
|
63
|
+
assert_array_equal(ss.toarray(), s1_data.toarray())
|
|
64
|
+
assert_array_equal(ss.todense(), s1_data.toarray())
|
|
65
65
|
|
|
66
66
|
v = SparseNDArray(v1, shape=(3,))
|
|
67
67
|
assert s.ndim
|
|
@@ -331,12 +331,12 @@ def test_sparse_dot():
|
|
|
331
331
|
|
|
332
332
|
assert_array_equal(mls.dot(s1, v1_s), s1.dot(v1_data))
|
|
333
333
|
assert_array_equal(mls.dot(s2, v1_s), s2.dot(v1_data))
|
|
334
|
-
assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.
|
|
335
|
-
assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.
|
|
334
|
+
assert_array_equal(mls.dot(v2_s, s1), v2_data.dot(s1_data.toarray()))
|
|
335
|
+
assert_array_equal(mls.dot(v2_s, s2), v2_data.dot(s2_data.toarray()))
|
|
336
336
|
assert_array_equal(mls.dot(v1_s, v1_s), v1_data.dot(v1_data), almost=True)
|
|
337
337
|
assert_array_equal(mls.dot(v2_s, v2_s), v2_data.dot(v2_data), almost=True)
|
|
338
338
|
|
|
339
|
-
assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.
|
|
339
|
+
assert_array_equal(mls.dot(v2_s, s1, sparse=False), v2_data.dot(s1_data.toarray()))
|
|
340
340
|
assert_array_equal(mls.dot(v1_s, v1_s, sparse=False), v1_data.dot(v1_data))
|
|
341
341
|
|
|
342
342
|
|
|
@@ -390,7 +390,7 @@ def test_sparse_fill_diagonal():
|
|
|
390
390
|
arr = SparseNDArray(s1)
|
|
391
391
|
arr.fill_diagonal(3)
|
|
392
392
|
|
|
393
|
-
expected = s1.copy().
|
|
393
|
+
expected = s1.copy().toarray()
|
|
394
394
|
np.fill_diagonal(expected, 3)
|
|
395
395
|
|
|
396
396
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -399,7 +399,7 @@ def test_sparse_fill_diagonal():
|
|
|
399
399
|
arr = SparseNDArray(s1)
|
|
400
400
|
arr.fill_diagonal(3, wrap=True)
|
|
401
401
|
|
|
402
|
-
expected = s1.copy().
|
|
402
|
+
expected = s1.copy().toarray()
|
|
403
403
|
np.fill_diagonal(expected, 3, wrap=True)
|
|
404
404
|
|
|
405
405
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -408,7 +408,7 @@ def test_sparse_fill_diagonal():
|
|
|
408
408
|
arr = SparseNDArray(s1)
|
|
409
409
|
arr.fill_diagonal([1, 2, 3])
|
|
410
410
|
|
|
411
|
-
expected = s1.copy().
|
|
411
|
+
expected = s1.copy().toarray()
|
|
412
412
|
np.fill_diagonal(expected, [1, 2, 3])
|
|
413
413
|
|
|
414
414
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -417,7 +417,7 @@ def test_sparse_fill_diagonal():
|
|
|
417
417
|
arr = SparseNDArray(s1)
|
|
418
418
|
arr.fill_diagonal([1, 2, 3], wrap=True)
|
|
419
419
|
|
|
420
|
-
expected = s1.copy().
|
|
420
|
+
expected = s1.copy().toarray()
|
|
421
421
|
np.fill_diagonal(expected, [1, 2, 3], wrap=True)
|
|
422
422
|
|
|
423
423
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -427,7 +427,7 @@ def test_sparse_fill_diagonal():
|
|
|
427
427
|
arr = SparseNDArray(s1)
|
|
428
428
|
arr.fill_diagonal(val)
|
|
429
429
|
|
|
430
|
-
expected = s1.copy().
|
|
430
|
+
expected = s1.copy().toarray()
|
|
431
431
|
np.fill_diagonal(expected, val)
|
|
432
432
|
|
|
433
433
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -437,7 +437,7 @@ def test_sparse_fill_diagonal():
|
|
|
437
437
|
arr = SparseNDArray(s1)
|
|
438
438
|
arr.fill_diagonal(val, wrap=True)
|
|
439
439
|
|
|
440
|
-
expected = s1.copy().
|
|
440
|
+
expected = s1.copy().toarray()
|
|
441
441
|
np.fill_diagonal(expected, val, wrap=True)
|
|
442
442
|
|
|
443
443
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -447,7 +447,7 @@ def test_sparse_fill_diagonal():
|
|
|
447
447
|
arr = SparseNDArray(s1)
|
|
448
448
|
arr.fill_diagonal(val)
|
|
449
449
|
|
|
450
|
-
expected = s1.copy().
|
|
450
|
+
expected = s1.copy().toarray()
|
|
451
451
|
np.fill_diagonal(expected, val)
|
|
452
452
|
|
|
453
453
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
|
@@ -457,7 +457,7 @@ def test_sparse_fill_diagonal():
|
|
|
457
457
|
arr = SparseNDArray(s1)
|
|
458
458
|
arr.fill_diagonal(val, wrap=True)
|
|
459
459
|
|
|
460
|
-
expected = s1.copy().
|
|
460
|
+
expected = s1.copy().toarray()
|
|
461
461
|
np.fill_diagonal(expected, val, wrap=True)
|
|
462
462
|
|
|
463
463
|
np.testing.assert_array_equal(arr.toarray(), expected)
|
maxframe/lib/wrapped_pickle.py
CHANGED
|
@@ -120,7 +120,8 @@ class _UnpickleSwitch:
|
|
|
120
120
|
@functools.wraps(func)
|
|
121
121
|
async def wrapped(*args, **kwargs):
|
|
122
122
|
with _UnpickleSwitch(forbidden=self._forbidden):
|
|
123
|
-
|
|
123
|
+
ret = await func(*args, **kwargs)
|
|
124
|
+
return ret
|
|
124
125
|
|
|
125
126
|
else:
|
|
126
127
|
|
maxframe/opcodes.py
CHANGED
|
@@ -386,9 +386,16 @@ DATAFRAME_EVAL = 738
|
|
|
386
386
|
DUPLICATED = 739
|
|
387
387
|
DELETE = 740
|
|
388
388
|
ALIGN = 741
|
|
389
|
+
CASE_WHEN = 742
|
|
390
|
+
PIVOT = 743
|
|
391
|
+
PIVOT_TABLE = 744
|
|
389
392
|
|
|
390
393
|
FUSE = 801
|
|
391
394
|
|
|
395
|
+
# LLM
|
|
396
|
+
DASHSCOPE_TEXT_GENERATION = 810
|
|
397
|
+
DASHSCOPE_MULTI_MODAL_GENERATION = 811
|
|
398
|
+
|
|
392
399
|
# table like input for tensor
|
|
393
400
|
TABLE_COO = 1003
|
|
394
401
|
# store tensor as coo format
|
|
@@ -529,6 +536,8 @@ STATSMODELS_TRAIN = 3012
|
|
|
529
536
|
STATSMODELS_PREDICT = 3013
|
|
530
537
|
|
|
531
538
|
# learn
|
|
539
|
+
CONNECTED_COMPONENTS = 3100
|
|
540
|
+
|
|
532
541
|
# checks
|
|
533
542
|
CHECK_NON_NEGATIVE = 3300
|
|
534
543
|
# classifier check targets
|
|
@@ -563,6 +572,8 @@ CHOLESKY_FUSE = 999988
|
|
|
563
572
|
|
|
564
573
|
# MaxFrame-dedicated functions
|
|
565
574
|
DATAFRAME_RESHUFFLE = 10001
|
|
575
|
+
FLATMAP = 10002
|
|
576
|
+
FLATJSON = 10003
|
|
566
577
|
|
|
567
578
|
# MaxFrame internal operators
|
|
568
579
|
DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
|