maxframe 0.1.0b5__cp311-cp311-win_amd64.whl → 1.0.0rc2__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (92) hide show
  1. maxframe/_utils.cp311-win_amd64.pyd +0 -0
  2. maxframe/codegen.py +6 -2
  3. maxframe/config/config.py +38 -2
  4. maxframe/config/validators.py +1 -0
  5. maxframe/conftest.py +2 -0
  6. maxframe/core/__init__.py +0 -3
  7. maxframe/core/entity/__init__.py +1 -8
  8. maxframe/core/entity/objects.py +3 -45
  9. maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
  10. maxframe/core/graph/core.pyx +4 -4
  11. maxframe/dataframe/__init__.py +1 -1
  12. maxframe/dataframe/arithmetic/around.py +5 -17
  13. maxframe/dataframe/arithmetic/core.py +15 -7
  14. maxframe/dataframe/arithmetic/docstring.py +5 -55
  15. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
  16. maxframe/dataframe/core.py +5 -5
  17. maxframe/dataframe/datasource/date_range.py +2 -2
  18. maxframe/dataframe/datasource/read_odps_query.py +6 -0
  19. maxframe/dataframe/datasource/read_odps_table.py +2 -1
  20. maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
  21. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  22. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  23. maxframe/dataframe/datastore/to_odps.py +21 -0
  24. maxframe/dataframe/groupby/cum.py +0 -1
  25. maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
  26. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  27. maxframe/dataframe/indexing/align.py +1 -1
  28. maxframe/dataframe/indexing/rename.py +3 -37
  29. maxframe/dataframe/indexing/sample.py +0 -1
  30. maxframe/dataframe/indexing/set_index.py +68 -1
  31. maxframe/dataframe/merge/merge.py +236 -2
  32. maxframe/dataframe/merge/tests/test_merge.py +123 -0
  33. maxframe/dataframe/misc/apply.py +5 -10
  34. maxframe/dataframe/misc/case_when.py +1 -1
  35. maxframe/dataframe/misc/describe.py +2 -2
  36. maxframe/dataframe/misc/drop_duplicates.py +4 -25
  37. maxframe/dataframe/misc/eval.py +4 -0
  38. maxframe/dataframe/misc/memory_usage.py +2 -2
  39. maxframe/dataframe/misc/pct_change.py +1 -83
  40. maxframe/dataframe/misc/tests/test_misc.py +23 -0
  41. maxframe/dataframe/misc/transform.py +1 -30
  42. maxframe/dataframe/misc/value_counts.py +4 -17
  43. maxframe/dataframe/missing/dropna.py +1 -1
  44. maxframe/dataframe/missing/fillna.py +5 -5
  45. maxframe/dataframe/sort/sort_values.py +1 -11
  46. maxframe/dataframe/statistics/corr.py +3 -3
  47. maxframe/dataframe/statistics/quantile.py +5 -17
  48. maxframe/dataframe/utils.py +4 -7
  49. maxframe/errors.py +13 -0
  50. maxframe/extension.py +12 -0
  51. maxframe/learn/contrib/xgboost/dmatrix.py +2 -2
  52. maxframe/learn/contrib/xgboost/predict.py +2 -2
  53. maxframe/learn/contrib/xgboost/train.py +2 -2
  54. maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
  55. maxframe/lib/mmh3.pyi +43 -0
  56. maxframe/lib/wrapped_pickle.py +2 -1
  57. maxframe/odpsio/__init__.py +1 -1
  58. maxframe/odpsio/arrow.py +8 -4
  59. maxframe/odpsio/schema.py +10 -7
  60. maxframe/odpsio/tableio.py +388 -14
  61. maxframe/odpsio/tests/test_schema.py +16 -15
  62. maxframe/odpsio/tests/test_tableio.py +48 -21
  63. maxframe/protocol.py +148 -12
  64. maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
  65. maxframe/serialization/core.pxd +3 -0
  66. maxframe/serialization/core.pyi +3 -0
  67. maxframe/serialization/core.pyx +54 -25
  68. maxframe/serialization/exception.py +1 -1
  69. maxframe/serialization/pandas.py +7 -2
  70. maxframe/serialization/serializables/core.py +158 -12
  71. maxframe/serialization/serializables/tests/test_serializable.py +46 -4
  72. maxframe/tensor/__init__.py +59 -0
  73. maxframe/tensor/arithmetic/tests/test_arithmetic.py +1 -1
  74. maxframe/tensor/base/atleast_1d.py +1 -1
  75. maxframe/tensor/base/unique.py +3 -3
  76. maxframe/tensor/reduction/count_nonzero.py +1 -1
  77. maxframe/tensor/statistics/quantile.py +2 -2
  78. maxframe/tests/test_protocol.py +34 -0
  79. maxframe/tests/test_utils.py +0 -12
  80. maxframe/tests/utils.py +11 -2
  81. maxframe/utils.py +24 -13
  82. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/METADATA +75 -2
  83. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/RECORD +91 -89
  84. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/WHEEL +1 -1
  85. maxframe_client/__init__.py +0 -1
  86. maxframe_client/fetcher.py +38 -27
  87. maxframe_client/session/odps.py +50 -10
  88. maxframe_client/session/task.py +41 -20
  89. maxframe_client/tests/test_fetcher.py +21 -3
  90. maxframe_client/tests/test_session.py +49 -2
  91. maxframe_client/clients/spe.py +0 -104
  92. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -26,7 +26,6 @@ import numpy as np
26
26
  import pandas as pd
27
27
  from pandas.api.extensions import ExtensionDtype
28
28
  from pandas.api.types import is_string_dtype
29
- from pandas.core.dtypes.cast import find_common_type
30
29
  from pandas.core.dtypes.inference import is_dict_like, is_list_like
31
30
 
32
31
  from ..core import Entity, ExecutableTuple
@@ -477,11 +476,11 @@ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
477
476
  else:
478
477
  fill_values = fill_value
479
478
 
480
- from .core import SERIES_TYPE
479
+ from .core import INDEX_TYPE, SERIES_TYPE
481
480
 
482
481
  dtypes = (
483
482
  pd.Series([df_obj.dtype], index=[df_obj.name])
484
- if isinstance(df_obj, SERIES_TYPE)
483
+ if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
485
484
  else df_obj.dtypes
486
485
  )
487
486
  for size, fill_value in zip(sizes, fill_values):
@@ -593,7 +592,7 @@ def build_series(
593
592
  return ret_series
594
593
 
595
594
 
596
- def infer_index_value(left_index_value, right_index_value):
595
+ def infer_index_value(left_index_value, right_index_value, level=None):
597
596
  from .core import IndexValue
598
597
 
599
598
  if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
@@ -616,9 +615,7 @@ def infer_index_value(left_index_value, right_index_value):
616
615
 
617
616
  left_index = left_index_value.to_pandas()
618
617
  right_index = right_index_value.to_pandas()
619
- out_index = pd.Index(
620
- [], dtype=find_common_type([left_index.dtype, right_index.dtype])
621
- )
618
+ out_index = left_index.join(right_index, level=level)[:0]
622
619
  return parse_index(out_index, left_index_value, right_index_value)
623
620
 
624
621
 
maxframe/errors.py CHANGED
@@ -17,5 +17,18 @@ class MaxFrameError(Exception):
17
17
  pass
18
18
 
19
19
 
20
+ class MaxFrameIntentionalError(MaxFrameError):
21
+ pass
22
+
23
+
20
24
  class MaxFrameUserError(MaxFrameError):
21
25
  pass
26
+
27
+
28
+ class NoTaskServerResponseError(MaxFrameError):
29
+ pass
30
+
31
+
32
+ class SessionAlreadyClosedError(MaxFrameError):
33
+ def __init__(self, session_id: str):
34
+ super().__init__(f"Session {session_id} is already closed")
maxframe/extension.py CHANGED
@@ -48,6 +48,18 @@ class MaxFrameExtension(metaclass=abc.ABCMeta):
48
48
  """
49
49
  pass
50
50
 
51
+ @classmethod
52
+ async def reload_session(cls, session_id: str) -> None:
53
+ """
54
+ Reload the session state when the session is recovered from failover.
55
+
56
+ Parameters
57
+ ----------
58
+ session_id : str
59
+ The session id.
60
+ """
61
+ pass
62
+
51
63
  @classmethod
52
64
  def init_service_extension(cls) -> None:
53
65
  """
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
- from .... import opcodes as OperandDef
16
+ from .... import opcodes
17
17
  from ....core.entity.output_types import get_output_types
18
18
  from ....core.operator.base import Operator
19
19
  from ....core.operator.core import TileableOperatorMixin
@@ -27,7 +27,7 @@ from ...utils import convert_to_tensor_or_dataframe
27
27
 
28
28
 
29
29
  class ToDMatrix(Operator, TileableOperatorMixin):
30
- _op_type_ = OperandDef.TO_DMATRIX
30
+ _op_type_ = opcodes.TO_DMATRIX
31
31
 
32
32
  data = KeyField("data", default=None)
33
33
  label = KeyField("label", default=None)
@@ -17,7 +17,7 @@ import pickle
17
17
  import numpy as np
18
18
  import pandas as pd
19
19
 
20
- from .... import opcodes as OperandDef
20
+ from .... import opcodes
21
21
  from ....core.entity.output_types import OutputType
22
22
  from ....core.operator.base import Operator
23
23
  from ....core.operator.core import TileableOperatorMixin
@@ -28,7 +28,7 @@ from .dmatrix import check_data
28
28
 
29
29
 
30
30
  class XGBPredict(Operator, TileableOperatorMixin):
31
- _op_type_ = OperandDef.XGBOOST_PREDICT
31
+ _op_type_ = opcodes.XGBOOST_PREDICT
32
32
  output_dtype = np.dtype(np.float32)
33
33
 
34
34
  data = KeyField("data", default=None)
@@ -15,7 +15,7 @@
15
15
  import logging
16
16
  from collections import OrderedDict
17
17
 
18
- from .... import opcodes as OperandDef
18
+ from .... import opcodes
19
19
  from ....core import OutputType
20
20
  from ....core.operator.base import Operator
21
21
  from ....core.operator.core import TileableOperatorMixin
@@ -41,7 +41,7 @@ def _on_serialize_evals(evals_val):
41
41
 
42
42
 
43
43
  class XGBTrain(Operator, TileableOperatorMixin):
44
- _op_type_ = OperandDef.XGBOOST_TRAIN
44
+ _op_type_ = opcodes.XGBOOST_TRAIN
45
45
 
46
46
  params = DictField("params", key_type=FieldTypes.string, default=None)
47
47
  dtrain = KeyField("dtrain", default=None)
Binary file
maxframe/lib/mmh3.pyi ADDED
@@ -0,0 +1,43 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Tuple
16
+
17
+ def hash(key, seed=0, signed=True) -> int:
18
+ """
19
+ Return a 32 bit integer.
20
+ """
21
+
22
+ def hash_from_buffer(key, seed=0, signed=True) -> int:
23
+ """
24
+ Return a 32 bit integer. Designed for large memory-views such as numpy arrays.
25
+ """
26
+
27
+ def hash64(key, seed=0, x64arch=True, signed=True) -> Tuple[int, int]:
28
+ """
29
+ Return a tuple of two 64 bit integers for a string. Optimized for
30
+ the x64 bit architecture when x64arch=True, otherwise for x86.
31
+ """
32
+
33
+ def hash128(key, seed=0, x64arch=True, signed=False) -> int:
34
+ """
35
+ Return a 128 bit long integer. Optimized for the x64 bit architecture
36
+ when x64arch=True, otherwise for x86.
37
+ """
38
+
39
+ def hash_bytes(key, seed=0, x64arch=True) -> bytes:
40
+ """
41
+ Return a 128 bit hash value as bytes for a string. Optimized for the
42
+ x64 bit architecture when x64arch=True, otherwise for the x86.
43
+ """
@@ -120,7 +120,8 @@ class _UnpickleSwitch:
120
120
  @functools.wraps(func)
121
121
  async def wrapped(*args, **kwargs):
122
122
  with _UnpickleSwitch(forbidden=self._forbidden):
123
- return await func(*args, **kwargs)
123
+ ret = await func(*args, **kwargs)
124
+ return ret
124
125
 
125
126
  else:
126
127
 
@@ -18,4 +18,4 @@ from .schema import (
18
18
  odps_schema_to_pandas_dtypes,
19
19
  pandas_to_odps_schema,
20
20
  )
21
- from .tableio import HaloTableIO
21
+ from .tableio import HaloTableIO, ODPSTableIO
maxframe/odpsio/arrow.py CHANGED
@@ -45,9 +45,13 @@ def _rebuild_dataframe(
45
45
 
46
46
  def _rebuild_index(df: pd.DataFrame, table_meta: DataFrameTableMeta) -> pd.Index:
47
47
  if df.shape[1] > 1:
48
- df.columns = pd.Index(table_meta.pd_index_level_names)
49
- return pd.MultiIndex.from_frame(df)
50
- return pd.Index(df.iloc[:, 0], name=table_meta.pd_index_level_names[0])
48
+ idx = pd.MultiIndex.from_frame(df)
49
+ idx.names = table_meta.pd_index_level_names
50
+ else:
51
+ # make sure even if None names are updated properly
52
+ idx = pd.Index(df.iloc[:, 0])
53
+ idx.name = table_meta.pd_index_level_names[0]
54
+ return idx
51
55
 
52
56
 
53
57
  def arrow_to_pandas(
@@ -75,7 +79,7 @@ def pandas_to_arrow(
75
79
  df.columns = pd.Index(table_meta.table_column_names)
76
80
  if not ignore_index:
77
81
  df = df.rename_axis(table_meta.table_index_column_names).reset_index()
78
- elif ignore_index:
82
+ elif ignore_index and table_meta.type != OutputType.index:
79
83
  df = pd.DataFrame([], columns=[])
80
84
  elif table_meta.type == OutputType.index:
81
85
  names = [f"_idx_{idx}" for idx in range(len(df.names))]
maxframe/odpsio/schema.py CHANGED
@@ -126,10 +126,15 @@ def odps_type_to_arrow_type(
126
126
  ]
127
127
  col_type = pa.struct(fields)
128
128
  elif isinstance(odps_type, odps_types.Decimal):
129
- col_type = pa.decimal128(
130
- odps_type.precision or odps_types.Decimal._max_precision,
131
- odps_type.scale or odps_types.Decimal._max_scale,
132
- )
129
+ if odps_type.name == "decimal":
130
+ # legacy decimal data without precision or scale
131
+ # precision data from internal compat mode
132
+ col_type = pa.decimal128(38, 18)
133
+ else:
134
+ col_type = pa.decimal128(
135
+ odps_type.precision or odps_types.Decimal._max_precision,
136
+ odps_type.scale or odps_types.Decimal._max_scale,
137
+ )
133
138
  elif isinstance(odps_type, (odps_types.Varchar, odps_types.Char)):
134
139
  col_type = pa.string()
135
140
  else:
@@ -289,8 +294,6 @@ def build_dataframe_table_meta(
289
294
  else: # pragma: no cover
290
295
  raise TypeError(f"Cannot accept type {type(df_obj)}")
291
296
 
292
- assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
293
-
294
297
  if obj_type == OutputType.scalar:
295
298
  pd_dtypes = pd.Series([])
296
299
  column_index_names = []
@@ -346,7 +349,7 @@ def build_dataframe_table_meta(
346
349
  else:
347
350
  index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
348
351
 
349
- if ignore_index:
352
+ if ignore_index and obj_type != OutputType.index:
350
353
  table_index_column_names = []
351
354
  pd_index_dtypes = pd.Series([], index=[])
352
355
  else: