maxframe 1.2.0__cp39-cp39-macosx_10_9_universal2.whl → 1.3.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (72) hide show
  1. maxframe/_utils.cpython-39-darwin.so +0 -0
  2. maxframe/codegen.py +70 -21
  3. maxframe/config/config.py +6 -0
  4. maxframe/core/accessor.py +1 -0
  5. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  6. maxframe/dataframe/accessors/__init__.py +1 -1
  7. maxframe/dataframe/accessors/dict_/accessor.py +1 -0
  8. maxframe/dataframe/accessors/dict_/length.py +1 -0
  9. maxframe/dataframe/accessors/dict_/setitem.py +1 -0
  10. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +5 -7
  11. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  12. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  13. maxframe/dataframe/accessors/list_/getitem.py +135 -0
  14. maxframe/dataframe/accessors/list_/length.py +73 -0
  15. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  16. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +79 -0
  17. maxframe/dataframe/accessors/plotting/__init__.py +2 -0
  18. maxframe/dataframe/accessors/string_/__init__.py +1 -0
  19. maxframe/dataframe/datasource/read_odps_query.py +1 -1
  20. maxframe/dataframe/datasource/tests/test_datasource.py +4 -0
  21. maxframe/dataframe/datastore/to_odps.py +6 -0
  22. maxframe/dataframe/extensions/accessor.py +1 -0
  23. maxframe/dataframe/extensions/apply_chunk.py +34 -21
  24. maxframe/dataframe/extensions/flatmap.py +8 -1
  25. maxframe/dataframe/extensions/tests/test_apply_chunk.py +2 -1
  26. maxframe/dataframe/extensions/tests/test_extensions.py +1 -0
  27. maxframe/dataframe/merge/concat.py +7 -4
  28. maxframe/dataframe/merge/merge.py +1 -0
  29. maxframe/dataframe/merge/tests/test_merge.py +97 -47
  30. maxframe/dataframe/missing/tests/test_missing.py +1 -0
  31. maxframe/dataframe/tests/test_utils.py +7 -0
  32. maxframe/dataframe/ufunc/ufunc.py +1 -0
  33. maxframe/dataframe/utils.py +3 -0
  34. maxframe/io/odpsio/schema.py +1 -0
  35. maxframe/learn/contrib/__init__.py +2 -4
  36. maxframe/learn/contrib/llm/__init__.py +1 -0
  37. maxframe/learn/contrib/llm/core.py +31 -10
  38. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  39. maxframe/learn/contrib/llm/models/dashscope.py +4 -3
  40. maxframe/learn/contrib/llm/models/managed.py +39 -0
  41. maxframe/learn/contrib/llm/multi_modal.py +1 -0
  42. maxframe/learn/contrib/llm/text.py +252 -8
  43. maxframe/learn/contrib/models.py +77 -0
  44. maxframe/learn/contrib/utils.py +1 -0
  45. maxframe/learn/contrib/xgboost/__init__.py +8 -1
  46. maxframe/learn/contrib/xgboost/classifier.py +15 -4
  47. maxframe/learn/contrib/xgboost/core.py +108 -1
  48. maxframe/learn/contrib/xgboost/dmatrix.py +1 -1
  49. maxframe/learn/contrib/xgboost/predict.py +8 -3
  50. maxframe/learn/contrib/xgboost/regressor.py +15 -1
  51. maxframe/learn/contrib/xgboost/train.py +5 -4
  52. maxframe/lib/dtypes_extension/__init__.py +2 -1
  53. maxframe/lib/dtypes_extension/dtypes.py +17 -42
  54. maxframe/lib/dtypes_extension/tests/test_dtypes.py +11 -31
  55. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  56. maxframe/opcodes.py +19 -0
  57. maxframe/serialization/__init__.py +1 -0
  58. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  59. maxframe/serialization/core.pyx +12 -1
  60. maxframe/serialization/numpy.py +12 -4
  61. maxframe/serialization/serializables/tests/test_serializable.py +13 -2
  62. maxframe/serialization/tests/test_serial.py +2 -0
  63. maxframe/tensor/merge/concatenate.py +1 -0
  64. maxframe/tensor/misc/unique.py +11 -10
  65. maxframe/tensor/reshape/reshape.py +4 -1
  66. maxframe/utils.py +4 -0
  67. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/METADATA +2 -2
  68. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/RECORD +72 -64
  69. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/WHEEL +1 -1
  70. maxframe_client/session/odps.py +3 -0
  71. maxframe_client/session/tests/test_task.py +1 -0
  72. {maxframe-1.2.0.dist-info → maxframe-1.3.0.dist-info}/top_level.txt +0 -0
Binary file
maxframe/codegen.py CHANGED
@@ -24,7 +24,7 @@ from odps.types import OdpsSchema
24
24
  from odps.utils import camel_to_underline
25
25
 
26
26
  from .core import OperatorType, Tileable, TileableGraph
27
- from .core.operator import Fetch
27
+ from .core.operator import Fetch, Operator
28
28
  from .extension import iter_extensions
29
29
  from .io.odpsio import build_dataframe_table_meta
30
30
  from .io.odpsio.schema import pandas_to_odps_schema
@@ -211,7 +211,21 @@ class BigDagCodeContext(metaclass=abc.ABCMeta):
211
211
  def get_udfs(self) -> List[AbstractUDF]:
212
212
  return list(self._udfs.values())
213
213
 
214
- def get_tileable_variable(self, tileable: Tileable) -> str:
214
+ def get_input_tileable_variable(self, tileable: Tileable) -> str:
215
+ """
216
+ Get or create the variable name for an input tileable. It should be used on the
217
+ RIGHT side of the assignment.
218
+ """
219
+ return self._get_tileable_variable(tileable)
220
+
221
+ def get_output_tileable_variable(self, tileable: Tileable) -> str:
222
+ """
223
+ Get or create the variable name for an output tileable. It should be used on the
224
+ LEFT side of the assignment.
225
+ """
226
+ return self._get_tileable_variable(tileable)
227
+
228
+ def _get_tileable_variable(self, tileable: Tileable) -> str:
215
229
  try:
216
230
  return self._tileable_key_to_variables[tileable.key]
217
231
  except KeyError:
@@ -315,7 +329,7 @@ class EngineAcceptance(Enum):
315
329
 
316
330
  class BigDagOperatorAdapter(metaclass=abc.ABCMeta):
317
331
  # todo handle refcount issue when generated code is being executed
318
- def accepts(self, op: OperatorType) -> EngineAcceptance:
332
+ def accepts(self, op: Operator) -> EngineAcceptance:
319
333
  return EngineAcceptance.ACCEPT
320
334
 
321
335
  @abc.abstractmethod
@@ -330,7 +344,7 @@ class BigDagOperatorAdapter(metaclass=abc.ABCMeta):
330
344
 
331
345
  Parameters
332
346
  ----------
333
- op : OperatorType
347
+ op : Operator
334
348
  The operator instance.
335
349
  context : BigDagCodeContext
336
350
  The BigDagCodeContext instance.
@@ -342,6 +356,48 @@ class BigDagOperatorAdapter(metaclass=abc.ABCMeta):
342
356
  """
343
357
  return list()
344
358
 
359
+ def generate_pre_op_code(
360
+ self, op: Operator, context: BigDagCodeContext
361
+ ) -> List[str]:
362
+ """
363
+ Generate the codes before actually handling the operator.
364
+ This method is usually implemented in the base class of each engine.
365
+
366
+ Parameters
367
+ ----------
368
+ op : Operator
369
+ The operator instance.
370
+ context : BigDagCodeContext
371
+ The BigDagCodeContext instance.
372
+
373
+ Returns
374
+ -------
375
+ result: List[str]
376
+ The codes generated before one operator actually handled, one per line.
377
+ """
378
+ return list()
379
+
380
+ def generate_post_op_code(
381
+ self, op: Operator, context: BigDagCodeContext
382
+ ) -> List[str]:
383
+ """
384
+ Generate the codes after actually handling the operator.
385
+ This method is usually implemented in the base class of each engine.
386
+
387
+ Parameters
388
+ ----------
389
+ op : Operator
390
+ The operator instance.
391
+ context : BigDagCodeContext
392
+ The BigDagCodeContext instance.
393
+
394
+ Returns
395
+ -------
396
+ result: List[str]
397
+ The codes generated after one operator actually handled, one per line.
398
+ """
399
+ return list()
400
+
345
401
 
346
402
  _engine_to_codegen: Dict[str, Type["BigDagCodeGenerator"]] = dict()
347
403
 
@@ -354,9 +410,6 @@ def register_engine_codegen(type_: Type["BigDagCodeGenerator"]):
354
410
  BUILTIN_ENGINE_SPE = "SPE"
355
411
  BUILTIN_ENGINE_MCSQL = "MCSQL"
356
412
 
357
- FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
358
- ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
359
-
360
413
 
361
414
  class BigDagCodeGenerator(metaclass=abc.ABCMeta):
362
415
  _context: BigDagCodeContext
@@ -364,11 +417,13 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
364
417
  engine_type: Optional[str] = None
365
418
  engine_priority: int = 0
366
419
  _extension_loaded = False
420
+ _generate_comments_enabled: bool = True
367
421
 
368
422
  def __init__(self, session_id: str, subdag_id: str = None):
369
423
  self._session_id = session_id
370
424
  self._subdag_id = subdag_id
371
425
  self._context = self._init_context(session_id, subdag_id)
426
+ self._generate_comments_enabled = True
372
427
 
373
428
  @classmethod
374
429
  def _load_engine_extensions(cls):
@@ -401,14 +456,6 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
401
456
  def _init_context(self, session_id: str, subdag_id: str) -> BigDagCodeContext:
402
457
  raise NotImplementedError
403
458
 
404
- def _generate_comments(
405
- self, op: OperatorType, adapter: BigDagOperatorAdapter
406
- ) -> List[str]:
407
- return adapter.generate_comment(op, self._context)
408
-
409
- def _generate_pre_op_code(self, op: OperatorType) -> List[str]:
410
- return []
411
-
412
459
  def _generate_delete_code(self, var_name: str) -> List[str]:
413
460
  return []
414
461
 
@@ -438,9 +485,11 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
438
485
  visited_op_key.add(op.key)
439
486
 
440
487
  adapter = self.get_op_adapter(type(op))()
441
- code_lines.extend(self._generate_pre_op_code(op))
442
- code_lines.extend(self._generate_comments(op, adapter))
488
+ code_lines.extend(adapter.generate_pre_op_code(op, self._context))
489
+ if self._generate_comments_enabled:
490
+ code_lines.extend(adapter.generate_comment(op, self._context))
443
491
  code_lines.extend(adapter.generate_code(op, self._context))
492
+ code_lines.extend(adapter.generate_post_op_code(op, self._context))
444
493
  code_lines.append("") # Append an empty line to separate operators
445
494
 
446
495
  # record refcounts
@@ -449,7 +498,7 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
449
498
  continue
450
499
  if dag.count_successors(out_t) == 0:
451
500
  delete_code = self._generate_delete_code(
452
- self._context.get_tileable_variable(out_t)
501
+ self._context.get_input_tileable_variable(out_t)
453
502
  )
454
503
  code_lines.extend(delete_code)
455
504
  else:
@@ -462,7 +511,7 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
462
511
  out_refcounts[inp_t.key] -= 1
463
512
  if out_refcounts[inp_t.key] == 0:
464
513
  delete_code = self._generate_delete_code(
465
- self._context.get_tileable_variable(inp_t)
514
+ self._context.get_input_tileable_variable(inp_t)
466
515
  )
467
516
  code_lines.extend(delete_code)
468
517
  out_refcounts.pop(inp_t.key)
@@ -475,11 +524,11 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
475
524
  for tileable in dag.topological_iter():
476
525
  op: OperatorType = tileable.op
477
526
  if isinstance(op, Fetch):
478
- fetch_tileable = self._context.get_tileable_variable(tileable)
527
+ fetch_tileable = self._context.get_input_tileable_variable(tileable)
479
528
  input_key_to_vars[op.outputs[0].key] = fetch_tileable
480
529
 
481
530
  result_variables = {
482
- t.key: self._context.get_tileable_variable(t) for t in dag.results
531
+ t.key: self._context.get_input_tileable_variable(t) for t in dag.results
483
532
  }
484
533
 
485
534
  return CodeGenResult(
maxframe/config/config.py CHANGED
@@ -407,6 +407,12 @@ default_options.register_option(
407
407
  validator=is_integer,
408
408
  remote=True,
409
409
  )
410
+ default_options.register_option(
411
+ "session.temp_table_properties",
412
+ None,
413
+ validator=is_null | is_dict,
414
+ remote=True,
415
+ )
410
416
  default_options.register_option(
411
417
  "session.auto_purge_temp_tables",
412
418
  False,
maxframe/core/accessor.py CHANGED
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import functools
15
16
 
16
17
 
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from . import datetime_, dict_, plotting, string_
15
+ from . import datetime_, dict_, list_, plotting, string_
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  from typing import TYPE_CHECKING
15
16
 
16
17
  import pandas as pd
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import pandas as pd
15
16
  import pyarrow as pa
16
17
 
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  from .... import opcodes
15
16
  from ....core.entity.output_types import OutputType
16
17
  from ....serialization.serializables.field import AnyField
@@ -11,6 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
14
15
  import numpy as np
15
16
  import pandas as pd
16
17
  import pyarrow as pa
@@ -25,6 +26,10 @@ from ..length import SeriesDictLengthOperator
25
26
  from ..remove import SeriesDictRemoveOperator
26
27
  from ..setitem import SeriesDictSetItemOperator
27
28
 
29
+ pytestmark = pytest.mark.skipif(
30
+ ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported"
31
+ )
32
+
28
33
 
29
34
  @pytest.fixture
30
35
  def df():
@@ -40,13 +45,11 @@ def df():
40
45
  )
41
46
 
42
47
 
43
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
44
48
  def test_invalid_dtype(df):
45
49
  with pytest.raises(AttributeError):
46
50
  df["C"].dict.contains("k1")
47
51
 
48
52
 
49
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
50
53
  def test_getitem(df):
51
54
  s1 = df["A"].dict["k1"]
52
55
  assert isinstance(s1, md.Series)
@@ -61,7 +64,6 @@ def test_getitem(df):
61
64
  assert op.ignore_key_error is False
62
65
 
63
66
 
64
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
65
67
  def test_getitem_with_default_value(df):
66
68
  s1 = df["B"].dict.get("k1", 1)
67
69
  assert isinstance(s1, md.Series)
@@ -76,7 +78,6 @@ def test_getitem_with_default_value(df):
76
78
  assert op.ignore_key_error is True
77
79
 
78
80
 
79
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
80
81
  def test_setitem(df):
81
82
  s1 = df["A"]
82
83
  s1.dict["k1"] = "v3"
@@ -91,7 +92,6 @@ def test_setitem(df):
91
92
  assert op.value == "v3"
92
93
 
93
94
 
94
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
95
95
  def test_length(df):
96
96
  s1 = df["A"].dict.len()
97
97
  assert isinstance(s1, md.Series)
@@ -103,7 +103,6 @@ def test_length(df):
103
103
  assert isinstance(op, SeriesDictLengthOperator)
104
104
 
105
105
 
106
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
107
106
  def test_remove(df):
108
107
  s1 = df["A"].dict.remove("k1", ignore_key_error=True)
109
108
  assert isinstance(s1, md.Series)
@@ -117,7 +116,6 @@ def test_remove(df):
117
116
  assert op.ignore_key_error is True
118
117
 
119
118
 
120
- @pytest.mark.skipif(ARROW_DTYPE_NOT_SUPPORTED, reason="Arrow Dtype is not supported")
121
119
  def test_contains(df):
122
120
  s1 = df["A"].dict.contains("k1")
123
121
  assert isinstance(s1, md.Series)
@@ -0,0 +1,37 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ def _install():
17
+ from ....core import CachedAccessor
18
+ from ...core import SERIES_TYPE
19
+ from .accessor import ListAccessor
20
+ from .getitem import series_list_getitem, series_list_getitem_with_index_error
21
+ from .length import series_list_length
22
+
23
+ dict_method_to_handlers = {
24
+ "__getitem__": series_list_getitem_with_index_error,
25
+ "get": series_list_getitem,
26
+ "len": series_list_length,
27
+ }
28
+
29
+ for name, handler in dict_method_to_handlers.items():
30
+ ListAccessor._register(name, handler)
31
+
32
+ for series in SERIES_TYPE:
33
+ series.list = CachedAccessor("list", ListAccessor)
34
+
35
+
36
+ _install()
37
+ del _install
@@ -0,0 +1,39 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import TYPE_CHECKING
16
+
17
+ import pandas as pd
18
+ import pyarrow as pa
19
+
20
+ from ....core import BaseMaxFrameAccessor
21
+ from ....utils import ARROW_DTYPE_NOT_SUPPORTED
22
+
23
+ if TYPE_CHECKING:
24
+ from ...core import Series
25
+
26
+
27
+ class ListAccessor(BaseMaxFrameAccessor):
28
+ obj: "Series"
29
+ _api_count: int = 0
30
+
31
+ def __init__(self, series):
32
+ super().__init__(series)
33
+ if ARROW_DTYPE_NOT_SUPPORTED:
34
+ raise ImportError("pd.ArrowDtype is not supported in current environment")
35
+
36
+ if not isinstance(series.dtype, pd.ArrowDtype) or not isinstance(
37
+ series.dtype.pyarrow_dtype, pa.ListType
38
+ ):
39
+ raise AttributeError("Can only use .list accessor with list values")
@@ -0,0 +1,135 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from .... import opcodes
18
+ from ....core.entity.output_types import OutputType
19
+ from ....serialization.serializables.field import AnyField, BoolField
20
+ from ...operators import DataFrameOperator, DataFrameOperatorMixin
21
+
22
+
23
+ class SeriesListGetItemOperator(DataFrameOperator, DataFrameOperatorMixin):
24
+ _op_type_ = opcodes.SERIES_LIST_GETITEM
25
+ query_index = AnyField("query_index", default=None)
26
+ ignore_index_error = BoolField("ignore_index_error", default=False)
27
+
28
+ def __init__(self, **kw):
29
+ super().__init__(_output_types=[OutputType.series], **kw)
30
+
31
+ def __call__(self, series):
32
+ arrow_list_type = series.dtype.pyarrow_dtype
33
+ return self.new_series(
34
+ [series],
35
+ shape=series.shape,
36
+ dtype=pd.ArrowDtype(arrow_list_type.value_type),
37
+ index_value=series.index_value,
38
+ )
39
+
40
+
41
+ def series_list_getitem(series, query_index):
42
+ """
43
+ Get the value by the index of each list in the Series.
44
+
45
+ Parameters
46
+ ----------
47
+ query_index : Any
48
+ The key to check, must be index.
49
+
50
+ Returns
51
+ -------
52
+ Series :
53
+ A Series with the list value's data type. The value will be
54
+ ``None`` if the list is None.
55
+
56
+ Examples
57
+ --------
58
+ Create a series with list type data.
59
+
60
+ >>> import maxframe.dataframe as md
61
+ >>> import pyarrow as pa
62
+ >>> from maxframe.lib.dtypes_extension import list_
63
+ >>> s = md.Series(
64
+ ... data=[[1, 2, 3], [4, 5, 6], None],
65
+ ... index=[1, 2, 3],
66
+ ... dtype=list_(pa.int64()),
67
+ ... )
68
+ >>> s.execute()
69
+ 1 [1, 2, 3]
70
+ 2 [4, 5, 6]
71
+ 3 <NA>
72
+ dtype: list<int64>[pyarrow]
73
+
74
+ >>> s.list.get(0).execute()
75
+ 1 1
76
+ 2 4
77
+ 3 <NA>
78
+ dtype: int64[pyarrow]
79
+ """
80
+ return SeriesListGetItemOperator(query_index=query_index, ignore_index_error=True)(
81
+ series
82
+ )
83
+
84
+
85
+ def series_list_getitem_with_index_error(series, query_index):
86
+ """
87
+ Get the value by the index of each list in the Series. If the index
88
+ is not in the list, raise IndexError.
89
+
90
+ Parameters
91
+ ----------
92
+ query_index : Any
93
+ The index to check, must be integer.
94
+
95
+ Returns
96
+ -------
97
+ Series :
98
+ A Series with the list value's data type. Return ``None`` if the list is None.
99
+
100
+ Raises
101
+ ------
102
+ KeyError
103
+ If the index is not in one list.
104
+
105
+ See Also
106
+ --------
107
+ Series.list.get: Get the value by the index of each list in the Series.
108
+
109
+ Examples
110
+ --------
111
+ Create a series with list type data.
112
+
113
+ >>> import maxframe.dataframe as md
114
+ >>> import pyarrow as pa
115
+ >>> from maxframe.lib.dtypes_extension import list_
116
+ >>> s = md.Series(
117
+ ... data=[[1, 2, 3], [4, 5, 6], None],
118
+ ... index=[1, 2, 3],
119
+ ... dtype=list_(pa.int64()),
120
+ ... )
121
+ >>> s.execute()
122
+ 1 [1, 2, 3]
123
+ 2 [4, 5, 6]
124
+ 3 <NA>
125
+ dtype: list<int64>[pyarrow]
126
+
127
+ >>> s.list.get(0).execute()
128
+ 1 1
129
+ 2 4
130
+ 3 <NA>
131
+ dtype: int64[pyarrow]
132
+ """
133
+ return SeriesListGetItemOperator(query_index=query_index, ignore_index_error=False)(
134
+ series
135
+ )
@@ -0,0 +1,73 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+ import pyarrow as pa
17
+
18
+ from .... import opcodes
19
+ from ....core.entity.output_types import OutputType
20
+ from ...operators import DataFrameOperator, DataFrameOperatorMixin
21
+
22
+
23
+ class SeriesListLengthOperator(DataFrameOperator, DataFrameOperatorMixin):
24
+ _op_type_ = opcodes.SERIES_LIST_LENGTH
25
+
26
+ def __init__(self, **kw):
27
+ super().__init__(_output_types=[OutputType.series], **kw)
28
+
29
+ def __call__(self, series):
30
+ return self.new_series(
31
+ [series],
32
+ shape=series.shape,
33
+ index_value=series.index_value,
34
+ dtype=pd.ArrowDtype(pa.int64()),
35
+ name=None,
36
+ )
37
+
38
+
39
+ def series_list_length(series):
40
+ """
41
+ Get the length of each list of the Series.
42
+
43
+ Returns
44
+ -------
45
+ Series :
46
+ A Series with data type ``pandas.ArrowDtype(pyarrow.int64)``. Each element
47
+ represents the length of the list, or ``None`` if the list is ``None``.
48
+
49
+ Examples
50
+ --------
51
+ Create a series with list type data.
52
+
53
+ >>> import maxframe.dataframe as md
54
+ >>> import pyarrow as pa
55
+ >>> from maxframe.lib.dtypes_extension import list_
56
+ >>> s = md.Series(
57
+ ... data=[[1, 2, 3], [4, 5, 6], None],
58
+ ... index=[1, 2, 3],
59
+ ... dtype=list_(pa.int64()),
60
+ ... )
61
+ >>> s.execute()
62
+ 1 [1, 2, 3]
63
+ 2 [4, 5, 6]
64
+ 3 <NA>
65
+ dtype: list<int64>[pyarrow]
66
+
67
+ >>> s.list.len().execute()
68
+ 1 2
69
+ 2 1
70
+ 3 <NA>
71
+ dtype: int64[pyarrow]
72
+ """
73
+ return SeriesListLengthOperator()(series)
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.