PyPI - maxframe - Versions diffs - 1.2.1__cp38-cp38-win32.whl → 1.3.1__cp38-cp38-win32.whl - Mend

maxframe 1.2.1cp38-cp38-win32.whl → 1.3.1cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of maxframe might be problematic. Click here for more details.

Files changed (73) hide show

maxframe/_utils.cp38-win32.pyd +0 -0
maxframe/codegen.py +70 -21
maxframe/config/config.py +6 -0
maxframe/core/accessor.py +1 -0
maxframe/core/graph/core.cp38-win32.pyd +0 -0
maxframe/dataframe/accessors/__init__.py +1 -1
maxframe/dataframe/accessors/dict_/accessor.py +1 -0
maxframe/dataframe/accessors/dict_/length.py +1 -0
maxframe/dataframe/accessors/dict_/setitem.py +1 -0
maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +5 -7
maxframe/dataframe/accessors/list_/__init__.py +37 -0
maxframe/dataframe/accessors/list_/accessor.py +39 -0
maxframe/dataframe/accessors/list_/getitem.py +135 -0
maxframe/dataframe/accessors/list_/length.py +73 -0
maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +79 -0
maxframe/dataframe/accessors/plotting/__init__.py +2 -0
maxframe/dataframe/accessors/string_/__init__.py +1 -0
maxframe/dataframe/datastore/to_odps.py +6 -0
maxframe/dataframe/extensions/accessor.py +1 -0
maxframe/dataframe/extensions/apply_chunk.py +34 -21
maxframe/dataframe/extensions/flatmap.py +8 -1
maxframe/dataframe/extensions/tests/test_apply_chunk.py +2 -1
maxframe/dataframe/extensions/tests/test_extensions.py +1 -0
maxframe/dataframe/groupby/aggregation.py +53 -1
maxframe/dataframe/merge/concat.py +7 -4
maxframe/dataframe/merge/merge.py +1 -0
maxframe/dataframe/merge/tests/test_merge.py +97 -47
maxframe/dataframe/missing/tests/test_missing.py +1 -0
maxframe/dataframe/reduction/aggregation.py +63 -0
maxframe/dataframe/reduction/core.py +17 -5
maxframe/dataframe/tests/test_utils.py +7 -0
maxframe/dataframe/ufunc/ufunc.py +1 -0
maxframe/dataframe/utils.py +3 -0
maxframe/io/odpsio/schema.py +1 -0
maxframe/learn/contrib/__init__.py +2 -4
maxframe/learn/contrib/llm/__init__.py +1 -0
maxframe/learn/contrib/llm/core.py +31 -10
maxframe/learn/contrib/llm/models/__init__.py +1 -0
maxframe/learn/contrib/llm/models/dashscope.py +38 -3
maxframe/learn/contrib/llm/models/managed.py +54 -0
maxframe/learn/contrib/llm/multi_modal.py +93 -0
maxframe/learn/contrib/llm/text.py +268 -8
maxframe/learn/contrib/models.py +77 -0
maxframe/learn/contrib/utils.py +1 -0
maxframe/learn/contrib/xgboost/__init__.py +8 -1
maxframe/learn/contrib/xgboost/classifier.py +15 -4
maxframe/learn/contrib/xgboost/core.py +108 -1
maxframe/learn/contrib/xgboost/dmatrix.py +1 -1
maxframe/learn/contrib/xgboost/predict.py +6 -3
maxframe/learn/contrib/xgboost/regressor.py +15 -1
maxframe/learn/contrib/xgboost/train.py +5 -4
maxframe/lib/dtypes_extension/__init__.py +2 -1
maxframe/lib/dtypes_extension/dtypes.py +21 -0
maxframe/lib/dtypes_extension/tests/test_dtypes.py +13 -3
maxframe/lib/mmh3.cp38-win32.pyd +0 -0
maxframe/opcodes.py +19 -0
maxframe/serialization/__init__.py +1 -0
maxframe/serialization/core.cp38-win32.pyd +0 -0
maxframe/serialization/core.pyx +12 -1
maxframe/serialization/numpy.py +12 -4
maxframe/serialization/serializables/tests/test_serializable.py +13 -2
maxframe/serialization/tests/test_serial.py +2 -0
maxframe/tensor/merge/concatenate.py +1 -0
maxframe/tensor/misc/unique.py +11 -10
maxframe/tensor/reshape/reshape.py +4 -1
maxframe/utils.py +4 -0
{maxframe-1.2.1.dist-info → maxframe-1.3.1.dist-info}/METADATA +2 -1
{maxframe-1.2.1.dist-info → maxframe-1.3.1.dist-info}/RECORD +73 -65
{maxframe-1.2.1.dist-info → maxframe-1.3.1.dist-info}/WHEEL +1 -1
maxframe_client/session/odps.py +3 -0
maxframe_client/session/tests/test_task.py +1 -0
{maxframe-1.2.1.dist-info → maxframe-1.3.1.dist-info}/top_level.txt +0 -0

maxframe/dataframe/merge/tests/test_merge.py CHANGED Viewed

@@ -16,10 +16,10 @@ import numpy as np
 import pandas as pd
 import pytest
+from .... import dataframe as md
 from ....tests.utils import assert_mf_index_dtype
 from ...core import IndexValue
-from ...datasource.dataframe import from_pandas
-from .. import DataFrameMerge, concat
+from .. import DataFrameMerge
 from ..merge import DistributedMapJoinHint, MapJoinHint, SkewJoinHint
@@ -29,8 +29,8 @@ def test_merge():
     )
     df2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
-    mdf1 = from_pandas(df1, chunk_size=2)
-    mdf2 = from_pandas(df2, chunk_size=3)
+    mdf1 = md.DataFrame(df1, chunk_size=2)
+    mdf2 = md.DataFrame(df2, chunk_size=3)
     mapjoin = MapJoinHint()
     dist_mapjoin1 = DistributedMapJoinHint(shard_count=5)
@@ -83,8 +83,8 @@ def test_merge_invalid_parameters():
     )
     pdf2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
-    df1 = from_pandas(pdf1, chunk_size=2)
-    df2 = from_pandas(pdf2, chunk_size=3)
+    df1 = md.DataFrame(pdf1, chunk_size=2)
+    df2 = md.DataFrame(pdf2, chunk_size=3)
     with pytest.raises(ValueError):
         df1.merge(df2, bloom_filter="wrong")
@@ -104,8 +104,8 @@ def test_join():
     df2 = pd.DataFrame([[1, 2, 3], [1, 5, 6], [7, 8, 9]], index=["a1", "b2", "b3"]) + 1
     df2 = pd.concat([df2, df2 + 1])
-    mdf1 = from_pandas(df1, chunk_size=2)
-    mdf2 = from_pandas(df2, chunk_size=2)
+    mdf1 = md.DataFrame(df1, chunk_size=2)
+    mdf2 = md.DataFrame(df2, chunk_size=2)
     parameters = [
         {"lsuffix": "l_", "rsuffix": "r_"},
@@ -132,8 +132,8 @@ def test_join_on():
     )
     df2 = pd.concat([df2, df2 + 1])
-    mdf1 = from_pandas(df1, chunk_size=2)
-    mdf2 = from_pandas(df2, chunk_size=2)
+    mdf1 = md.DataFrame(df1, chunk_size=2)
+    mdf2 = md.DataFrame(df2, chunk_size=2)
     parameters = [
         {"lsuffix": "l_", "rsuffix": "r_"},
@@ -157,15 +157,15 @@ def test_append():
     df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
     df2 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
-    mdf1 = from_pandas(df1, chunk_size=3)
-    mdf2 = from_pandas(df2, chunk_size=3)
+    mdf1 = md.DataFrame(df1, chunk_size=3)
+    mdf2 = md.DataFrame(df2, chunk_size=3)
     adf = mdf1.append(mdf2)
     assert adf.shape == (20, 4)
     assert_mf_index_dtype(adf.index_value.value, np.int64)
-    mdf1 = from_pandas(df1, chunk_size=3)
-    mdf2 = from_pandas(df2, chunk_size=3)
+    mdf1 = md.DataFrame(df1, chunk_size=3)
+    mdf2 = md.DataFrame(df2, chunk_size=3)
     adf = mdf1.append(mdf2, ignore_index=True)
     assert adf.shape == (20, 4)
@@ -173,84 +173,135 @@ def test_append():
     pd.testing.assert_index_equal(adf.index_value.to_pandas(), pd.RangeIndex(20))
-def test_concat():
+def test_concat_dataframe():
+    # test index concatenate
     df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
     df2 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
-    mdf1 = from_pandas(df1, chunk_size=4)
-    mdf2 = from_pandas(df2, chunk_size=4)
-    r = concat([mdf1, mdf2], axis="index")
+    mdf1 = md.DataFrame(df1, chunk_size=4)
+    mdf2 = md.DataFrame(df2, chunk_size=4)
+    r = md.concat([mdf1, mdf2], axis="index")
     assert r.shape == (20, 4)
     assert not isinstance(r.index_value.to_pandas(), pd.RangeIndex)
-    pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
+    pd.testing.assert_series_equal(r.dtypes, mdf1.dtypes)
-    df3 = pd.DataFrame(
-        np.random.rand(10, 4), columns=list("ABCD"), index=pd.RangeIndex(10, 20)
+    # test index concatenate with range index
+    mdf3 = md.DataFrame(
+        np.random.rand(10, 4),
+        columns=list("ABCD"),
+        index=pd.RangeIndex(10, 20),
+        chunk_size=4,
     )
-    mdf3 = from_pandas(df3, chunk_size=4)
-    r = concat([mdf1, mdf3], axis="index")
+    r = md.concat([mdf1, mdf3], axis="index")
     assert r.shape == (20, 4)
-    pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
+    pd.testing.assert_series_equal(r.dtypes, mdf1.dtypes)
     pd.testing.assert_index_equal(r.index_value.to_pandas(), pd.RangeIndex(20))
+    # test index concatenate with perm index
     df4 = pd.DataFrame(
         np.random.rand(10, 4),
         columns=list("ABCD"),
         index=np.random.permutation(np.arange(10)),
     )
-    mdf4 = from_pandas(df4, chunk_size=4)
-    r = concat([mdf1, mdf4], axis="index")
+    # test concat with same index with different sources
+    mdf4 = md.DataFrame(df4, chunk_size=4)
+    r = md.concat([mdf1, mdf4], axis="index")
     assert r.shape == (20, 4)
-    pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
+    pd.testing.assert_series_equal(r.dtypes, mdf1.dtypes)
     pd.testing.assert_index_equal(
         r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
-    r = concat([mdf4, mdf1], axis="index")
+    r = md.concat([mdf4, mdf1], axis="index")
     assert r.shape == (20, 4)
-    pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
+    pd.testing.assert_series_equal(r.dtypes, mdf1.dtypes)
     pd.testing.assert_index_equal(
         r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
-    r = concat([mdf4, mdf4], axis="index")
+    # test concat with same index with same source
+    r = md.concat([mdf4, mdf4], axis="index")
     assert r.shape == (20, 4)
-    pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
+    pd.testing.assert_series_equal(r.dtypes, mdf1.dtypes)
     pd.testing.assert_index_equal(
         r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
     )
-    mdf1 = from_pandas(df1, chunk_size=3)
-    mdf2 = from_pandas(df2, chunk_size=4)
-    r = concat([mdf1, mdf2], axis="columns")
+    # test concat with column outer join
+    mdf1 = md.DataFrame(df1, chunk_size=3)
+    mdf2 = md.DataFrame(df2, chunk_size=4)
+    r = md.concat([mdf1, mdf2], axis="columns")
     assert r.shape == (10, 8)
     expected_dtypes = pd.concat([df1, df2], axis="columns").dtypes
     pd.testing.assert_series_equal(r.dtypes, expected_dtypes)
-    df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
-    df2 = pd.DataFrame(np.random.rand(10, 3), columns=list("ABC"))
-    mdf1 = from_pandas(df1, chunk_size=3)
-    mdf2 = from_pandas(df2, chunk_size=3)
-    r = concat([mdf1, mdf2], join="inner")
+    # test concat with column inner join
+    mdf1 = md.DataFrame(np.random.rand(10, 4), columns=list("ABCD"), chunk_size=3)
+    mdf2 = md.DataFrame(np.random.rand(10, 3), columns=list("ABC"), chunk_size=3)
+    r = md.concat([mdf1, mdf2], join="inner")
     assert r.shape == (20, 3)
+    # test concat with ignore index
+    r = md.concat([mdf1, mdf2], join="inner", ignore_index=True)
+    assert r.shape == (20, 3)
+    pd.testing.assert_index_equal(r.index_value.to_pandas(), pd.RangeIndex(20))
+    # test concat with unknown shapes
+    mdf1._shape = (np.nan, 4)
+    r = md.concat([mdf1, mdf2], join="inner", ignore_index=True)
+    np.testing.assert_array_equal(np.array(r.shape), np.array((np.nan, 3)))
+    r = md.concat([mdf1, mdf2], join="inner", ignore_index=True)
+    np.testing.assert_array_equal(np.array(r.shape), np.array((np.nan, 3)))
+    # test concat with empty frames
+    r = md.concat([md.DataFrame([]), mdf2], ignore_index=True)
+    assert r.shape == (10, 3)
+def test_concat_series():
+    # test row concat
+    ms1 = md.Series(np.random.rand(10))
+    ms2 = md.Series(np.random.rand(10))
+    r = md.concat([ms1, ms2])
+    assert r.shape == (20,)
+    # test row concat with unknown shape
+    ms1._shape = (np.nan,)
+    r = md.concat([ms1, ms2])
+    assert np.isnan(r.shape[0])
+    r = md.concat([ms1, ms2], ignore_index=True)
+    assert np.isnan(r.shape[0])
+    # test col concat
+    ms1 = md.Series(np.random.rand(10))
+    ms2 = md.Series(np.random.rand(10))
+    r = md.concat([ms1, ms2], axis=1)
+    assert r.shape == (10, 2)
+    # test col concat with names
+    ms1.name = "col1"
+    ms2.name = "col2"
+    r = md.concat([ms1, ms2], axis=1)
+    assert r.shape == (10, 2)
+    assert r.dtypes.index.tolist() == ["col1", "col2"]
 def test_invalid_join_hint():
-    df1 = pd.DataFrame(
-        np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
+    mdf1 = md.DataFrame(
+        np.arange(20).reshape((4, 5)) + 1,
+        columns=["a", "b", "c", "d", "e"],
+        chunk_size=2,
+    )
+    mdf2 = md.DataFrame(
+        np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"], chunk_size=3
     )
-    df2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
-    mdf1 = from_pandas(df1, chunk_size=2)
-    mdf2 = from_pandas(df2, chunk_size=3)
     # type error
     parameters = [
@@ -282,7 +333,6 @@ def test_invalid_join_hint():
     ]
     for kw in parameters:
-        print(kw)
         with pytest.raises(TypeError):
             mdf1.merge(mdf2, **kw)

maxframe/dataframe/missing/tests/test_missing.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import random
 import numpy as np

maxframe/dataframe/reduction/aggregation.py CHANGED Viewed

@@ -315,6 +315,69 @@ def compile_reduction_funcs(op: DataFrameAggregate, input: TileableType):
 def aggregate(df, func=None, axis=0, **kw):
+    """
+    Aggregate using one or more operations over the specified axis.
+    Parameters
+    ----------
+    df : DataFrame, Series
+        Object to aggregate.
+    func : list or dict
+        Function to use for aggregating the data.
+    axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+        If 0 or ‘index’: apply function to each column. If 1 or ‘columns’: apply function to each row.
+    kw
+        Keyword arguments to pass to func.
+    Returns
+    -------
+    scalar, Series or DataFrame
+        The return can be:
+        * scalar : when Series.agg is called with single function
+        * Series : when DataFrame.agg is called with a single function
+        * DataFrame : when DataFrame.agg is called with several functions
+    Examples
+    --------
+    >>> import maxframe.dataframe as md
+    >>> df = md.DataFrame([[1, 2, 3],
+    ...            [4, 5, 6],
+    ...            [7, 8, 9],
+    ...            [np.nan, np.nan, np.nan]],
+    ...           columns=['A', 'B', 'C']).execute()
+    Aggregate these functions over the rows.
+    >>> df.agg(['sum', 'min']).execute()
+            A     B     C
+    min   1.0   2.0   3.0
+    sum  12.0  15.0  18.0
+    Different aggregations per column.
+    >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}).execute()
+            A    B
+    max   NaN  8.0
+    min   1.0  2.0
+    sum  12.0  NaN
+    Aggregate different functions over the columns and rename the index of the resulting DataFrame.
+    >>> df.agg(x=('A', 'max'), y=('B', 'min'), z=('C', 'mean')).execute()
+         A    B    C
+    x  7.0  NaN  NaN
+    y  NaN  2.0  NaN
+    z  NaN  NaN  6.0
+    >>> s = md.Series([1, 2, 3, 4])
+    >>> s.agg('min').execute()
+    1
+    >>> s.agg(['min', 'max']).execute()
+    max    4
+    min    1
+    """
     axis = validate_axis(axis, df)
     if (
         df.ndim == 2

maxframe/dataframe/reduction/core.py CHANGED Viewed

@@ -404,6 +404,7 @@ class ReductionPostStep(NamedTuple):
     func_name: str
     columns: Optional[List[str]]
     func_idl: bytes
+    post_func_aliases: Optional[List[str]] = None
 class ReductionSteps(NamedTuple):
@@ -462,6 +463,7 @@ class ReductionCompiler:
         self._output_key_to_agg_steps = dict()
         self._output_key_to_post_steps = dict()
         self._output_key_to_post_cols = dict()
+        self._output_key_to_col_func_mapping = dict()
     @classmethod
     def _check_function_valid(cls, func):
@@ -531,6 +533,14 @@ class ReductionCompiler:
             self._output_key_to_post_steps[step.output_key] = step
             self._update_col_dict(self._output_key_to_post_cols, step.output_key, cols)
+            if cols is not None:
+                col_name_map = (
+                    self._output_key_to_col_func_mapping.get(step.output_key) or {}
+                )
+                for col in cols:
+                    col_name_map[col] = func_name
+                self._output_key_to_col_func_mapping[step.output_key] = col_name_map
     @staticmethod
     def _build_mock_return_object(func, input_dtype, ndim):
         from ..initializer import DataFrame as MaxDataFrame
@@ -812,11 +822,12 @@ class ReductionCompiler:
             agg_funcs.append(step)
         for key, step in self._output_key_to_post_steps.items():
-            cols = self._output_key_to_post_cols[key]
-            if cols and set(cols) == set(referred_cols):
-                post_cols = None
-            else:
-                post_cols = cols
+            post_cols = self._output_key_to_post_cols[key]
+            func_renames = None
+            if post_cols:
+                col_map = self._output_key_to_col_func_mapping.get(key)
+                if col_map:
+                    func_renames = [col_map[c] for c in post_cols]
             func_name = step.func_name
             if self._lambda_counter == 1 and step.func_name == "<lambda_0>":
@@ -831,6 +842,7 @@ class ReductionCompiler:
                     func_name,
                     post_cols,
                     step.func_idl,
+                    func_renames,
                 )
             )

maxframe/dataframe/tests/test_utils.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import numpy as np
 import pandas as pd
 import pyarrow as pa
@@ -71,6 +72,12 @@ def test_pack_function(df1):
 @pytest.mark.parametrize(
     "dtype, fill_value, expected",
     [
+        (
+            ArrowDtype(pa.list_(pa.string())) if ArrowDtype else None,
+            1,
+            ["1"],
+        ),
+        (pa.list_(pa.string()), 1, ["1"]),
         (
             ArrowDtype(pa.map_(pa.int32(), pa.string())) if ArrowDtype else None,
             1,

maxframe/dataframe/ufunc/ufunc.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from numbers import Number
 from ...tensor import tensor as astensor

maxframe/dataframe/utils.py CHANGED Viewed

@@ -463,6 +463,9 @@ def _generate_value(dtype, fill_value):
     if ArrowDtype and isinstance(dtype, pd.ArrowDtype):
         return _generate_value(dtype.pyarrow_dtype, fill_value)
+    if isinstance(dtype, pa.ListType):
+        return [_generate_value(dtype.value_type, fill_value)]
     if isinstance(dtype, pa.MapType):
         return [
             (

maxframe/io/odpsio/schema.py CHANGED Viewed

@@ -28,6 +28,7 @@ from ...protocol import DataFrameTableMeta
 from ...tensor.core import TENSOR_TYPE
 _TEMP_TABLE_PREFIX = "tmp_mf_"
+DEFAULT_SINGLE_INDEX_NAME = "_idx_0"
 _arrow_to_odps_types = {
     pa.string(): odps_types.string,

maxframe/learn/contrib/__init__.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from . import graph, llm, pytorch
+from . import graph, llm, models, pytorch
-del graph
-del llm
-del pytorch
+del graph, llm, models, pytorch

maxframe/learn/contrib/llm/__init__.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from . import models, multi_modal, text
 del models

maxframe/learn/contrib/llm/core.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import Any, Dict
 import numpy as np
@@ -19,6 +20,8 @@ import pandas as pd
 from ....core.entity.output_types import OutputType
 from ....core.operator.base import Operator
 from ....core.operator.core import TileableOperatorMixin
+from ....dataframe.core import SERIES_TYPE
+from ....dataframe.operators import DataFrameOperatorMixin
 from ....dataframe.utils import parse_index
 from ....serialization.serializables.core import Serializable
 from ....serialization.serializables.field import AnyField, DictField, StringField
@@ -31,24 +34,42 @@ class LLM(Serializable):
         pass
-class LLMOperator(Operator, TileableOperatorMixin):
+class LLMTaskOperator(Operator, DataFrameOperatorMixin):
+    task = AnyField("task", default=None)
     model = AnyField("model", default=None)
-    prompt_template = AnyField("prompt_template", default=None)
     params = DictField("params", default=None)
+    running_options: Dict[str, Any] = DictField("running_options", default=None)
     def __init__(self, output_types=None, **kw):
         if output_types is None:
             output_types = [OutputType.dataframe]
         super().__init__(_output_types=output_types, **kw)
-    def __call__(self, data):
-        col_names = ["response", "success"]
-        columns = parse_index(pd.Index(col_names), store_data=True)
-        out_dtypes = pd.Series([np.dtype("O"), np.dtype("bool")], index=col_names)
-        return self.new_tileable(
+    def get_output_dtypes(self) -> Dict[str, np.dtype]:
+        raise NotImplementedError
+    def __call__(self, data, index=None):
+        outputs = self.get_output_dtypes()
+        col_name = list(outputs.keys())
+        columns = parse_index(pd.Index(col_name), store_data=True)
+        out_dtypes = pd.Series(list(outputs.values()), index=col_name)
+        index_value = index or (
+            parse_index(pd.RangeIndex(-1), data)
+            if isinstance(data, SERIES_TYPE)
+            else data.index_value
+        )
+        return self.new_dataframe(
             inputs=[data],
-            dtypes=out_dtypes,
-            shape=(data.shape[0], len(col_names)),
-            index_value=data.index_value,
+            shape=(np.nan, len(col_name)),
+            index_value=index_value,
             columns_value=columns,
+            dtypes=out_dtypes,
         )
+class LLMTextGenOperator(LLMTaskOperator, TileableOperatorMixin):
+    prompt_template = AnyField("prompt_template", default=None)
+    def get_output_dtypes(self) -> Dict[str, np.dtype]:
+        return {"response": np.dtype("O"), "success": np.dtype("bool")}

maxframe/learn/contrib/llm/models/__init__.py CHANGED Viewed

@@ -11,4 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .dashscope import DashScopeMultiModalLLM, DashScopeTextLLM

maxframe/learn/contrib/llm/models/dashscope.py CHANGED Viewed

@@ -11,12 +11,13 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from typing import Any, Dict
 from ..... import opcodes
 from .....serialization.serializables.core import Serializable
 from .....serialization.serializables.field import StringField
-from ..core import LLMOperator
+from ..core import LLMTextGenOperator
 from ..multi_modal import MultiModalLLM
 from ..text import TextLLM
@@ -33,8 +34,25 @@ class DashScopeLLMMixin(Serializable):
 class DashScopeTextLLM(TextLLM, DashScopeLLMMixin):
+    """
+    DashScope text LLM.
+    """
     api_key_resource = StringField("api_key_resource", default=None)
+    def __init__(self, name: str, api_key_resource: str):
+        """
+        Initialize a DashScope text LLM.
+        Parameters
+        ----------
+        name : str
+            The LLM name to use, check DashScope for `available models <https://help.aliyun.com/zh/model-studio/getting-started/models>`_.
+        api_key_resource : str
+            The MaxCompute resource file name containing the DashScope API key.
+        """
+        super().__init__(name=name, api_key_resource=api_key_resource)
     def generate(
         self,
         data,
@@ -49,8 +67,25 @@ class DashScopeTextLLM(TextLLM, DashScopeLLMMixin):
 class DashScopeMultiModalLLM(MultiModalLLM, DashScopeLLMMixin):
+    """
+    DashScope multi-modal LLM.
+    """
     api_key_resource = StringField("api_key_resource", default=None)
+    def __init__(self, name: str, api_key_resource: str):
+        """
+        Initialize a DashScope multi-modal LLM.
+        Parameters
+        ----------
+        name : str
+            The LLM name to use, check DashScope for `available models <https://help.aliyun.com/zh/model-studio/getting-started/models>`_.
+        api_key_resource : str
+            The MaxCompute resource file name containing the DashScope API key.
+        """
+        super().__init__(name=name, api_key_resource=api_key_resource)
     def generate(
         self,
         data,
@@ -65,9 +100,9 @@ class DashScopeMultiModalLLM(MultiModalLLM, DashScopeLLMMixin):
         )(data)
-class DashScopeTextGenerationOperator(LLMOperator):
+class DashScopeTextGenerationOperator(LLMTextGenOperator):
     _op_type_ = opcodes.DASHSCOPE_TEXT_GENERATION
-class DashScopeMultiModalGenerationOperator(LLMOperator):
+class DashScopeMultiModalGenerationOperator(LLMTextGenOperator):
     _op_type_ = opcodes.DASHSCOPE_MULTI_MODAL_GENERATION

maxframe/learn/contrib/llm/models/managed.py ADDED Viewed

@@ -0,0 +1,54 @@
+# Copyright 1999-2025 Alibaba Group Holding Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Dict, List
+from ..... import opcodes
+from .....serialization.serializables import StringField
+from ..core import LLMTextGenOperator
+from ..text import TextLLM
+class ManagedLLMTextGenOperator(LLMTextGenOperator):
+    _op_type_ = opcodes.MANAGED_TEXT_MODAL_GENERATION
+    inference_framework: str = StringField("inference_framework", default=None)
+class ManagedTextLLM(TextLLM):
+    """
+    Managed text LLM by MaxFrame.
+    """
+    def __init__(self, name: str):
+        """
+        Initialize a managed text LLM.
+        Parameters
+        ----------
+        name : str
+            The managed text LLM name to use.
+        """
+        super().__init__(name=name)
+    def generate(
+        self,
+        data,
+        prompt_template: List[Dict[str, Any]],
+        params: Dict[str, Any] = None,
+        **kw
+    ):
+        return ManagedLLMTextGenOperator(
+            model=self, prompt_template=prompt_template, params=params, **kw
+        )(data)

maxframe 1.2.1__cp38-cp38-win32.whl → 1.3.1__cp38-cp38-win32.whl

Potentially problematic release.

maxframe 1.2.1cp38-cp38-win32.whl → 1.3.1cp38-cp38-win32.whl