maxframe 0.1.0b4__cp311-cp311-win32.whl → 1.0.0__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (214) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win32.pyd +0 -0
  3. maxframe/codegen.py +56 -5
  4. maxframe/config/config.py +78 -10
  5. maxframe/config/validators.py +42 -11
  6. maxframe/conftest.py +58 -14
  7. maxframe/core/__init__.py +2 -16
  8. maxframe/core/entity/__init__.py +1 -12
  9. maxframe/core/entity/executable.py +1 -1
  10. maxframe/core/entity/objects.py +46 -45
  11. maxframe/core/entity/output_types.py +0 -3
  12. maxframe/core/entity/tests/test_objects.py +43 -0
  13. maxframe/core/entity/tileables.py +5 -78
  14. maxframe/core/graph/__init__.py +2 -2
  15. maxframe/core/graph/builder/__init__.py +0 -1
  16. maxframe/core/graph/builder/base.py +5 -4
  17. maxframe/core/graph/builder/tileable.py +4 -4
  18. maxframe/core/graph/builder/utils.py +4 -8
  19. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  20. maxframe/core/graph/core.pyx +4 -4
  21. maxframe/core/graph/entity.py +9 -33
  22. maxframe/core/operator/__init__.py +2 -9
  23. maxframe/core/operator/base.py +3 -5
  24. maxframe/core/operator/objects.py +0 -9
  25. maxframe/core/operator/utils.py +55 -0
  26. maxframe/dataframe/__init__.py +2 -1
  27. maxframe/dataframe/arithmetic/around.py +5 -17
  28. maxframe/dataframe/arithmetic/core.py +15 -7
  29. maxframe/dataframe/arithmetic/docstring.py +7 -33
  30. maxframe/dataframe/arithmetic/equal.py +4 -2
  31. maxframe/dataframe/arithmetic/greater.py +4 -2
  32. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  33. maxframe/dataframe/arithmetic/less.py +2 -2
  34. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  36. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  37. maxframe/dataframe/core.py +58 -12
  38. maxframe/dataframe/datasource/date_range.py +2 -2
  39. maxframe/dataframe/datasource/read_odps_query.py +120 -24
  40. maxframe/dataframe/datasource/read_odps_table.py +9 -4
  41. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  42. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  43. maxframe/dataframe/datastore/to_odps.py +28 -0
  44. maxframe/dataframe/extensions/__init__.py +5 -0
  45. maxframe/dataframe/extensions/flatjson.py +131 -0
  46. maxframe/dataframe/extensions/flatmap.py +317 -0
  47. maxframe/dataframe/extensions/reshuffle.py +1 -1
  48. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  49. maxframe/dataframe/groupby/core.py +1 -1
  50. maxframe/dataframe/groupby/cum.py +0 -1
  51. maxframe/dataframe/groupby/fill.py +4 -1
  52. maxframe/dataframe/groupby/getitem.py +6 -0
  53. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  54. maxframe/dataframe/groupby/transform.py +5 -1
  55. maxframe/dataframe/indexing/align.py +1 -1
  56. maxframe/dataframe/indexing/loc.py +6 -4
  57. maxframe/dataframe/indexing/rename.py +5 -28
  58. maxframe/dataframe/indexing/sample.py +0 -1
  59. maxframe/dataframe/indexing/set_index.py +68 -1
  60. maxframe/dataframe/initializer.py +11 -1
  61. maxframe/dataframe/merge/__init__.py +9 -1
  62. maxframe/dataframe/merge/concat.py +41 -31
  63. maxframe/dataframe/merge/merge.py +237 -3
  64. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  65. maxframe/dataframe/misc/__init__.py +4 -0
  66. maxframe/dataframe/misc/apply.py +6 -11
  67. maxframe/dataframe/misc/case_when.py +141 -0
  68. maxframe/dataframe/misc/describe.py +2 -2
  69. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  70. maxframe/dataframe/misc/eval.py +4 -0
  71. maxframe/dataframe/misc/memory_usage.py +2 -2
  72. maxframe/dataframe/misc/pct_change.py +1 -83
  73. maxframe/dataframe/misc/pivot_table.py +262 -0
  74. maxframe/dataframe/misc/tests/test_misc.py +93 -1
  75. maxframe/dataframe/misc/transform.py +1 -30
  76. maxframe/dataframe/misc/value_counts.py +4 -17
  77. maxframe/dataframe/missing/dropna.py +1 -1
  78. maxframe/dataframe/missing/fillna.py +5 -5
  79. maxframe/dataframe/operators.py +1 -17
  80. maxframe/dataframe/plotting/core.py +2 -2
  81. maxframe/dataframe/reduction/core.py +4 -3
  82. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  83. maxframe/dataframe/sort/sort_values.py +1 -11
  84. maxframe/dataframe/statistics/corr.py +3 -3
  85. maxframe/dataframe/statistics/quantile.py +13 -19
  86. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  87. maxframe/dataframe/tests/test_initializer.py +33 -2
  88. maxframe/dataframe/utils.py +33 -11
  89. maxframe/dataframe/window/expanding.py +5 -3
  90. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  91. maxframe/errors.py +13 -0
  92. maxframe/extension.py +12 -0
  93. maxframe/io/__init__.py +13 -0
  94. maxframe/io/objects/__init__.py +24 -0
  95. maxframe/io/objects/core.py +140 -0
  96. maxframe/io/objects/tensor.py +76 -0
  97. maxframe/io/objects/tests/__init__.py +13 -0
  98. maxframe/io/objects/tests/test_object_io.py +97 -0
  99. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  100. maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
  101. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  102. maxframe/io/odpsio/tableio.py +719 -0
  103. maxframe/io/odpsio/tests/__init__.py +13 -0
  104. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
  105. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  106. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  107. maxframe/io/odpsio/volumeio.py +63 -0
  108. maxframe/learn/contrib/__init__.py +3 -1
  109. maxframe/learn/contrib/graph/__init__.py +15 -0
  110. maxframe/learn/contrib/graph/connected_components.py +215 -0
  111. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  112. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  113. maxframe/learn/contrib/llm/__init__.py +16 -0
  114. maxframe/learn/contrib/llm/core.py +54 -0
  115. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  116. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  117. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  118. maxframe/learn/contrib/llm/text.py +42 -0
  119. maxframe/learn/contrib/utils.py +52 -0
  120. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  121. maxframe/learn/contrib/xgboost/classifier.py +110 -0
  122. maxframe/learn/contrib/xgboost/core.py +241 -0
  123. maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
  124. maxframe/learn/contrib/xgboost/predict.py +121 -0
  125. maxframe/learn/contrib/xgboost/regressor.py +71 -0
  126. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  127. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  128. maxframe/learn/contrib/xgboost/train.py +132 -0
  129. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  130. maxframe/learn/utils/__init__.py +15 -0
  131. maxframe/learn/utils/core.py +29 -0
  132. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  133. maxframe/lib/mmh3.pyi +43 -0
  134. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  135. maxframe/lib/wrapped_pickle.py +2 -1
  136. maxframe/opcodes.py +11 -0
  137. maxframe/protocol.py +154 -27
  138. maxframe/remote/core.py +4 -8
  139. maxframe/serialization/__init__.py +1 -0
  140. maxframe/serialization/core.cp311-win32.pyd +0 -0
  141. maxframe/serialization/core.pxd +3 -0
  142. maxframe/serialization/core.pyi +64 -0
  143. maxframe/serialization/core.pyx +67 -26
  144. maxframe/serialization/exception.py +1 -1
  145. maxframe/serialization/pandas.py +52 -17
  146. maxframe/serialization/serializables/core.py +180 -15
  147. maxframe/serialization/serializables/field_type.py +4 -1
  148. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  149. maxframe/serialization/tests/test_serial.py +2 -1
  150. maxframe/session.py +37 -2
  151. maxframe/tensor/__init__.py +81 -2
  152. maxframe/tensor/arithmetic/isclose.py +1 -0
  153. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  154. maxframe/tensor/core.py +5 -136
  155. maxframe/tensor/datasource/array.py +7 -2
  156. maxframe/tensor/datasource/full.py +1 -1
  157. maxframe/tensor/datasource/scalar.py +1 -1
  158. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  159. maxframe/tensor/indexing/flatnonzero.py +1 -1
  160. maxframe/tensor/indexing/getitem.py +2 -0
  161. maxframe/tensor/merge/__init__.py +2 -0
  162. maxframe/tensor/merge/concatenate.py +101 -0
  163. maxframe/tensor/merge/tests/test_merge.py +30 -1
  164. maxframe/tensor/merge/vstack.py +74 -0
  165. maxframe/tensor/{base → misc}/__init__.py +4 -0
  166. maxframe/tensor/misc/atleast_1d.py +72 -0
  167. maxframe/tensor/misc/atleast_2d.py +70 -0
  168. maxframe/tensor/misc/atleast_3d.py +85 -0
  169. maxframe/tensor/misc/tests/__init__.py +13 -0
  170. maxframe/tensor/{base → misc}/transpose.py +22 -18
  171. maxframe/tensor/misc/unique.py +205 -0
  172. maxframe/tensor/operators.py +1 -7
  173. maxframe/tensor/random/core.py +1 -1
  174. maxframe/tensor/reduction/count_nonzero.py +2 -1
  175. maxframe/tensor/reduction/mean.py +1 -0
  176. maxframe/tensor/reduction/nanmean.py +1 -0
  177. maxframe/tensor/reduction/nanvar.py +2 -0
  178. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  179. maxframe/tensor/reduction/var.py +2 -0
  180. maxframe/tensor/statistics/quantile.py +2 -2
  181. maxframe/tensor/utils.py +2 -22
  182. maxframe/tests/test_protocol.py +34 -0
  183. maxframe/tests/test_utils.py +0 -12
  184. maxframe/tests/utils.py +17 -2
  185. maxframe/typing_.py +4 -1
  186. maxframe/udf.py +62 -3
  187. maxframe/utils.py +112 -86
  188. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
  189. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
  190. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
  191. maxframe_client/__init__.py +0 -1
  192. maxframe_client/clients/framedriver.py +4 -1
  193. maxframe_client/fetcher.py +123 -54
  194. maxframe_client/session/consts.py +3 -0
  195. maxframe_client/session/graph.py +8 -2
  196. maxframe_client/session/odps.py +223 -40
  197. maxframe_client/session/task.py +108 -80
  198. maxframe_client/tests/test_fetcher.py +21 -3
  199. maxframe_client/tests/test_session.py +136 -8
  200. maxframe/core/entity/chunks.py +0 -68
  201. maxframe/core/entity/fuse.py +0 -73
  202. maxframe/core/graph/builder/chunk.py +0 -430
  203. maxframe/odpsio/tableio.py +0 -300
  204. maxframe/odpsio/volumeio.py +0 -95
  205. maxframe_client/clients/spe.py +0 -104
  206. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  207. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  208. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  209. /maxframe/tensor/{base → misc}/astype.py +0 -0
  210. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  211. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  212. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  213. /maxframe/tensor/{base → misc}/where.py +0 -0
  214. {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
@@ -46,6 +46,7 @@ from .misc.cut import cut
46
46
  from .misc.eval import maxframe_eval as eval # pylint: disable=redefined-builtin
47
47
  from .misc.get_dummies import get_dummies
48
48
  from .misc.melt import melt
49
+ from .misc.pivot_table import pivot_table
49
50
  from .misc.qcut import qcut
50
51
  from .misc.to_numeric import to_numeric
51
52
  from .missing import isna, isnull, notna, notnull
@@ -53,7 +54,7 @@ from .reduction import CustomReduction, unique
53
54
  from .tseries.to_datetime import to_datetime
54
55
 
55
56
  try:
56
- from pandas import NA, Timestamp
57
+ from pandas import NA, NaT, Timestamp
57
58
  except ImportError: # pragma: no cover
58
59
  pass
59
60
 
@@ -43,20 +43,20 @@ def around(df, decimals=0, *args, **kwargs):
43
43
  return op(df)
44
44
 
45
45
 
46
+ # FIXME Series input of decimals not supported yet
46
47
  around.__frame_doc__ = """
47
48
  Round a DataFrame to a variable number of decimal places.
48
49
 
49
50
  Parameters
50
51
  ----------
51
- decimals : int, dict, Series
52
+ decimals : int, dict
52
53
  Number of decimal places to round each column to. If an int is
53
54
  given, round each column to the same number of places.
54
55
  Otherwise dict and Series round to variable numbers of places.
55
56
  Column names should be in the keys if `decimals` is a
56
- dict-like, or in the index if `decimals` is a Series. Any
57
- columns not included in `decimals` will be left as is. Elements
58
- of `decimals` which are not columns of the input will be
59
- ignored.
57
+ dict-like. Any columns not included in `decimals` will be left
58
+ as is. Elements of `decimals` which are not columns of the
59
+ input will be ignored.
60
60
  *args
61
61
  Additional keywords have no effect but might be accepted for
62
62
  compatibility with numpy.
@@ -107,18 +107,6 @@ places as value
107
107
  1 0.0 1.0
108
108
  2 0.7 0.0
109
109
  3 0.2 0.0
110
-
111
- Using a Series, the number of places for specific columns can be
112
- specified with the column names as index and the number of
113
- decimal places as value
114
-
115
- >>> decimals = md.Series([0, 1], index=['cats', 'dogs'])
116
- >>> df.round(decimals).execute()
117
- dogs cats
118
- 0 0.2 0.0
119
- 1 0.0 1.0
120
- 2 0.7 0.0
121
- 3 0.2 0.0
122
110
  """
123
111
  around.__series_doc__ = """
124
112
  Round each value in a Series to the given number of decimals.
@@ -39,7 +39,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
39
39
  raise NotImplementedError
40
40
 
41
41
  @classmethod
42
- def _calc_properties(cls, x1, x2=None, axis="columns"):
42
+ def _calc_properties(cls, x1, x2=None, axis="columns", level=None):
43
43
  if isinstance(x1, DATAFRAME_TYPE) and (
44
44
  x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
45
45
  ):
@@ -108,7 +108,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
108
108
  index = copy.copy(x1.index_value)
109
109
  index_shape = x1.shape[0]
110
110
  else:
111
- index = infer_index_value(x1.index_value, x2.index_value)
111
+ index = infer_index_value(
112
+ x1.index_value, x2.index_value, level=level
113
+ )
112
114
  if index.key == x1.index_value.key == x2.index_value.key and (
113
115
  not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
114
116
  ):
@@ -141,7 +143,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
141
143
  column_shape = len(dtypes)
142
144
  else: # pragma: no cover
143
145
  dtypes = x1.dtypes # FIXME
144
- columns = infer_index_value(x1.columns_value, x2.index_value)
146
+ columns = infer_index_value(
147
+ x1.columns_value, x2.index_value, level=level
148
+ )
145
149
  column_shape = np.nan
146
150
  else:
147
151
  assert axis == "index" or axis == 0
@@ -169,7 +173,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
169
173
  ],
170
174
  index=x1.dtypes.index,
171
175
  )
172
- index = infer_index_value(x1.index_value, x2.index_value)
176
+ index = infer_index_value(
177
+ x1.index_value, x2.index_value, level=level
178
+ )
173
179
  index_shape = np.nan
174
180
  return {
175
181
  "shape": (index_shape, column_shape),
@@ -187,7 +193,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
187
193
  index = copy.copy(x1.index_value)
188
194
  index_shape = x1.shape[0]
189
195
  else:
190
- index = infer_index_value(x1.index_value, x2.index_value)
196
+ index = infer_index_value(
197
+ x1.index_value, x2.index_value, level=level
198
+ )
191
199
  if index.key == x1.index_value.key == x2.index_value.key and (
192
200
  not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
193
201
  ):
@@ -237,14 +245,14 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
237
245
  self._check_inputs(x1, x2)
238
246
  if isinstance(x1, DATAFRAME_TYPE) or isinstance(x2, DATAFRAME_TYPE):
239
247
  df1, df2 = (x1, x2) if isinstance(x1, DATAFRAME_TYPE) else (x2, x1)
240
- kw = self._calc_properties(df1, df2, axis=self.axis)
248
+ kw = self._calc_properties(df1, df2, axis=self.axis, level=self.level)
241
249
  if not pd.api.types.is_scalar(df2):
242
250
  return self.new_dataframe([x1, x2], **kw)
243
251
  else:
244
252
  return self.new_dataframe([df1], **kw)
245
253
  if isinstance(x1, SERIES_TYPE) or isinstance(x2, SERIES_TYPE):
246
254
  s1, s2 = (x1, x2) if isinstance(x1, SERIES_TYPE) else (x2, x1)
247
- kw = self._calc_properties(s1, s2)
255
+ kw = self._calc_properties(s1, s2, level=self.level)
248
256
  if not pd.api.types.is_scalar(s2):
249
257
  return self.new_series([x1, x2], **kw)
250
258
  else:
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ # FIXME:https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/17
15
16
  _flex_doc_FRAME = """
16
17
  Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
17
18
  Equivalent to ``{equiv}``, but with support to substitute a fill_value
@@ -127,44 +128,15 @@ circle 0
127
128
  triangle 3
128
129
  rectangle 4
129
130
 
130
- >>> (df * other).execute()
131
- angles degrees
132
- circle 0 NaN
133
- triangle 9 NaN
134
- rectangle 16 NaN
135
-
136
131
  >>> df.mul(other, fill_value=0).execute()
137
132
  angles degrees
138
133
  circle 0 0.0
139
134
  triangle 9 0.0
140
135
  rectangle 16 0.0
141
136
 
142
- Divide by a MultiIndex by level.
143
-
144
- >>> df_multindex = md.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
145
- ... 'degrees': [360, 180, 360, 360, 540, 720]}},
146
- ... index=[['A', 'A', 'A', 'B', 'B', 'B'],
147
- ... ['circle', 'triangle', 'rectangle',
148
- ... 'square', 'pentagon', 'hexagon']])
149
- >>> df_multindex.execute()
150
- angles degrees
151
- A circle 0 360
152
- triangle 3 180
153
- rectangle 4 360
154
- B square 4 360
155
- pentagon 5 540
156
- hexagon 6 720
157
-
158
- >>> df.div(df_multindex, level=1, fill_value=0).execute()
159
- angles degrees
160
- A circle NaN 1.0
161
- triangle 1.0 1.0
162
- rectangle 1.0 1.0
163
- B square 0.0 0.0
164
- pentagon 0.0 0.0
165
- hexagon 0.0 0.0
166
137
  """
167
138
 
139
+ # FIXME:https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/28
168
140
  _flex_doc_SERIES = """
169
141
  Return {desc} of series and other, element-wise (binary operator `{op_name}`).
170
142
 
@@ -257,7 +229,8 @@ Mismatched indices will be unioned together.
257
229
 
258
230
  Examples
259
231
  --------
260
- >>> df = pd.DataFrame({{'cost': [250, 150, 100],
232
+ >>> import maxframe.dataframe as md
233
+ >>> df = md.DataFrame({{'cost': [250, 150, 100],
261
234
  ... 'revenue': [100, 250, 300]}},
262
235
  ... index=['A', 'B', 'C'])
263
236
  >>> df.execute()
@@ -317,7 +290,7 @@ C True False
317
290
 
318
291
  Compare to a DataFrame of different shape.
319
292
 
320
- >>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
293
+ >>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
321
294
  ... index=['A', 'B', 'C', 'D'])
322
295
  >>> other.execute()
323
296
  revenue
@@ -335,7 +308,7 @@ D False False
335
308
 
336
309
  Compare to a MultiIndex by level.
337
310
 
338
- >>> df_multindex = pd.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
311
+ >>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
339
312
  ... 'revenue': [100, 250, 300, 200, 175, 225]}},
340
313
  ... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
341
314
  ... ['A', 'B', 'C', 'A', 'B', 'C']])
@@ -356,6 +329,7 @@ Q1 A True True
356
329
  Q2 A False True
357
330
  B True False
358
331
  C True False
332
+
359
333
  """
360
334
 
361
335
 
@@ -51,6 +51,8 @@ dtype: bool
51
51
 
52
52
 
53
53
  @bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
54
- def eq(df, other, axis="columns", level=None):
55
- op = DataFrameEqual(axis=axis, level=level, lhs=df, rhs=other)
54
+ def eq(df, other, axis="columns", level=None, fill_value=None):
55
+ op = DataFrameEqual(
56
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
57
+ )
56
58
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
55
- def gt(df, other, axis="columns", level=None):
56
- op = DataFrameGreater(axis=axis, level=level, lhs=df, rhs=other)
55
+ def gt(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameGreater(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
55
- def ge(df, other, axis="columns", level=None):
56
- op = DataFrameGreaterEqual(axis=axis, level=level, lhs=df, rhs=other)
55
+ def ge(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameGreaterEqual(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -52,6 +52,6 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
55
- def lt(df, other, axis="columns", level=None):
56
- op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
55
+ def lt(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
57
57
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
55
- def le(df, other, axis="columns", level=None):
56
- op = DataFrameLessEqual(axis=axis, level=level, lhs=df, rhs=other)
55
+ def le(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameLessEqual(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -51,6 +51,8 @@ dtype: bool
51
51
 
52
52
 
53
53
  @bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
54
- def ne(df, other, axis="columns", level=None):
55
- op = DataFrameNotEqual(axis=axis, level=level, lhs=df, rhs=other)
54
+ def ne(df, other, axis="columns", level=None, fill_value=None):
55
+ op = DataFrameNotEqual(
56
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
57
+ )
56
58
  return op(df, other)
@@ -22,6 +22,7 @@ import pandas as pd
22
22
  import pytest
23
23
 
24
24
  from ....core import OperatorType
25
+ from ....tests.utils import assert_mf_index_dtype
25
26
  from ....utils import dataslots
26
27
  from ...core import IndexValue
27
28
  from ...datasource.dataframe import from_pandas
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
164
165
  pd.testing.assert_index_equal(
165
166
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
166
167
  )
167
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
168
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
168
169
  pd.testing.assert_index_equal(
169
170
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
170
171
  )
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
176
177
  pd.testing.assert_index_equal(
177
178
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
178
179
  )
179
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
180
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
180
181
  pd.testing.assert_index_equal(
181
182
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
182
183
  )
@@ -239,6 +240,28 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
239
240
  assert df2.columns_value.key != df1.columns_value.key
240
241
 
241
242
 
243
+ @pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
244
+ def test_dataframe_and_series_with_multiindex(func_name, func_opts):
245
+ data1 = pd.DataFrame(
246
+ np.random.rand(10, 10),
247
+ index=pd.MultiIndex.from_arrays(
248
+ [list("AAAAABBBBB"), [4, 9, 3, 2, 1, 5, 8, 6, 7, 10]]
249
+ ),
250
+ columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7],
251
+ )
252
+ data1 = to_boolean_if_needed(func_opts.func_name, data1)
253
+ df1 = from_pandas(data1, chunk_size=5)
254
+ s1 = from_pandas_series(data1[10].reset_index(level=0, drop=True), chunk_size=6)
255
+
256
+ df2 = getattr(df1, func_opts.func_name)(s1, level=1, axis=0)
257
+
258
+ # test df2's index and columns
259
+ assert df2.shape == (np.nan, df1.shape[1])
260
+ assert df2.index_value.key != df1.index_value.key
261
+ assert df2.index_value.names == df1.index_value.names
262
+ assert df2.columns_value.key == df1.columns_value.key
263
+
264
+
242
265
  @pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
243
266
  def test_series_and_series_with_align_map(func_name, func_opts):
244
267
  data1 = pd.DataFrame(
@@ -348,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
348
371
  pd.testing.assert_index_equal(
349
372
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
350
373
  )
351
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
374
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
352
375
  pd.testing.assert_index_equal(
353
376
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
354
377
  )
@@ -381,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
381
404
  pd.testing.assert_index_equal(
382
405
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
383
406
  )
384
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
407
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
385
408
  pd.testing.assert_index_equal(
386
409
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
387
410
  )
@@ -411,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
411
434
  pd.testing.assert_index_equal(
412
435
  df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
413
436
  )
414
- assert isinstance(df6.index_value.value, IndexValue.Int64Index)
437
+ assert_mf_index_dtype(df6.index_value.value, np.int64)
415
438
  pd.testing.assert_index_equal(
416
439
  df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
417
440
  )
@@ -446,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
446
469
  pd.testing.assert_index_equal(
447
470
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
448
471
  )
449
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
472
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
450
473
  pd.testing.assert_index_equal(
451
474
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
452
475
  )
@@ -479,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
479
502
  pd.testing.assert_index_equal(
480
503
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
481
504
  )
482
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
505
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
483
506
  pd.testing.assert_index_equal(
484
507
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
485
508
  )
@@ -512,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
512
535
  pd.testing.assert_index_equal(
513
536
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
514
537
  )
515
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
538
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
516
539
  pd.testing.assert_index_equal(
517
540
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
518
541
  )
@@ -536,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
536
559
  pd.testing.assert_index_equal(
537
560
  df2.columns_value.to_pandas(), func_opts.func(data, data).columns
538
561
  )
539
- assert isinstance(df2.index_value.value, IndexValue.Int64Index)
562
+ assert_mf_index_dtype(df2.index_value.value, np.int64)
540
563
  pd.testing.assert_index_equal(
541
564
  df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
542
565
  )
@@ -568,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
568
591
  pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
569
592
 
570
593
  pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
571
- assert isinstance(result.index_value.value, IndexValue.Int64Index)
594
+ assert_mf_index_dtype(result.index_value.value, np.int64)
572
595
 
573
596
  pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
574
- assert isinstance(result2.index_value.value, IndexValue.Int64Index)
597
+ assert_mf_index_dtype(result2.index_value.value, np.int64)
575
598
 
576
599
  pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
577
- assert isinstance(result3.index_value.value, IndexValue.Int64Index)
600
+ assert_mf_index_dtype(result3.index_value.value, np.int64)
578
601
 
579
602
  pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
580
- assert isinstance(result4.index_value.value, IndexValue.Int64Index)
603
+ assert_mf_index_dtype(result4.index_value.value, np.int64)
581
604
 
582
605
  pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
583
- assert isinstance(result5.index_value.value, IndexValue.Int64Index)
606
+ assert_mf_index_dtype(result5.index_value.value, np.int64)
584
607
 
585
608
  if "builtin_function_or_method" not in str(type(func_opts.func)):
586
609
  # skip NotImplemented test for comparison function
@@ -657,7 +680,7 @@ def test_abs():
657
680
  pd.testing.assert_index_equal(
658
681
  df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
659
682
  )
660
- assert isinstance(df2.index_value.value, IndexValue.Int64Index)
683
+ assert_mf_index_dtype(df2.index_value.value, np.int64)
661
684
  assert df2.shape == (10, 10)
662
685
 
663
686
 
@@ -675,7 +698,7 @@ def test_not():
675
698
  pd.testing.assert_index_equal(
676
699
  df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
677
700
  )
678
- assert isinstance(df2.index_value.value, IndexValue.Int64Index)
701
+ assert_mf_index_dtype(df2.index_value.value, np.int64)
679
702
  assert df2.shape == (10, 10)
680
703
 
681
704
 
@@ -35,6 +35,7 @@ from ..core import (
35
35
  register_output_types,
36
36
  )
37
37
  from ..core.entity.utils import refresh_tileable_shape
38
+ from ..protocol import DataFrameTableMeta
38
39
  from ..serialization.serializables import (
39
40
  AnyField,
40
41
  BoolField,
@@ -59,7 +60,13 @@ from ..utils import (
59
60
  on_serialize_numpy_type,
60
61
  tokenize,
61
62
  )
62
- from .utils import ReprSeries, fetch_corner_data, merge_index_value, parse_index
63
+ from .utils import (
64
+ ReprSeries,
65
+ apply_if_callable,
66
+ fetch_corner_data,
67
+ merge_index_value,
68
+ parse_index,
69
+ )
63
70
 
64
71
 
65
72
  class IndexValue(Serializable):
@@ -135,6 +142,14 @@ class IndexValue(Serializable):
135
142
  _data = NDArrayField("data")
136
143
  _dtype = DataTypeField("dtype")
137
144
 
145
+ @property
146
+ def dtype(self):
147
+ return getattr(self, "_dtype", None)
148
+
149
+ @property
150
+ def inferred_type(self):
151
+ return "floating" if self.dtype.kind == "f" else "integer"
152
+
138
153
  class RangeIndex(IndexBase):
139
154
  _name = AnyField("name")
140
155
  _slice = SliceField("slice")
@@ -236,6 +251,10 @@ class IndexValue(Serializable):
236
251
  _data = NDArrayField("data")
237
252
  _dtype = DataTypeField("dtype")
238
253
 
254
+ @property
255
+ def dtype(self):
256
+ return getattr(self, "_dtype", None)
257
+
239
258
  @property
240
259
  def inferred_type(self):
241
260
  return "integer"
@@ -247,6 +266,10 @@ class IndexValue(Serializable):
247
266
  _data = NDArrayField("data")
248
267
  _dtype = DataTypeField("dtype")
249
268
 
269
+ @property
270
+ def dtype(self):
271
+ return getattr(self, "_dtype", None)
272
+
250
273
  @property
251
274
  def inferred_type(self):
252
275
  return "integer"
@@ -258,6 +281,10 @@ class IndexValue(Serializable):
258
281
  _data = NDArrayField("data")
259
282
  _dtype = DataTypeField("dtype")
260
283
 
284
+ @property
285
+ def dtype(self):
286
+ return getattr(self, "_dtype", None)
287
+
261
288
  @property
262
289
  def inferred_type(self):
263
290
  return "floating"
@@ -616,6 +643,9 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
616
643
  if self._name is None:
617
644
  self._name = self.chunks[0].name
618
645
 
646
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
647
+ pass
648
+
619
649
  def _to_str(self, representation=False):
620
650
  if is_build_mode() or len(self._executed_sessions) == 0:
621
651
  # in build mode, or not executed, just return representation
@@ -945,6 +975,9 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
945
975
  if self._name is None:
946
976
  self._name = self.chunks[0].name
947
977
 
978
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
979
+ pass
980
+
948
981
  def _to_str(self, representation=False):
949
982
  if is_build_mode() or len(self._executed_sessions) == 0:
950
983
  # in build mode, or not executed, just return representation
@@ -978,7 +1011,7 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
978
1011
  return self._to_str(representation=False)
979
1012
 
980
1013
  def __repr__(self):
981
- return self._to_str(representation=False)
1014
+ return self._to_str(representation=True)
982
1015
 
983
1016
  @property
984
1017
  def dtype(self):
@@ -1073,11 +1106,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
1073
1106
  --------
1074
1107
  >>> import maxframe.dataframe as md
1075
1108
  >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1076
- >>> s.ndim.execute()
1109
+ >>> s.ndim
1077
1110
  1
1078
1111
 
1079
1112
  >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1080
- >>> df.ndim.execute()
1113
+ >>> df.ndim
1081
1114
  2
1082
1115
  """
1083
1116
  return super().ndim
@@ -1501,6 +1534,17 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1501
1534
  refresh_index_value(self)
1502
1535
  refresh_dtypes(self)
1503
1536
 
1537
+ def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
1538
+ self._dtypes = dtypes
1539
+ self._columns_value = parse_index(dtypes.index, store_data=True)
1540
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1541
+ new_shape = list(self._shape)
1542
+ new_shape[-1] = len(dtypes)
1543
+ self._shape = tuple(new_shape)
1544
+
1545
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1546
+ self.refresh_from_dtypes(table_meta.pd_column_dtypes)
1547
+
1504
1548
  @property
1505
1549
  def dtypes(self):
1506
1550
  dt = getattr(self, "_dtypes", None)
@@ -1644,6 +1688,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1644
1688
  raise NotImplementedError
1645
1689
 
1646
1690
  corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1691
+ if corner_data is None:
1692
+ return
1647
1693
 
1648
1694
  buf = StringIO()
1649
1695
  max_rows = pd.get_option("display.max_rows")
@@ -1739,11 +1785,11 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
1739
1785
  --------
1740
1786
  >>> import maxframe.dataframe as md
1741
1787
  >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1742
- >>> s.ndim.execute()
1788
+ >>> s.ndim
1743
1789
  1
1744
1790
 
1745
1791
  >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1746
- >>> df.ndim.execute()
1792
+ >>> df.ndim
1747
1793
  2
1748
1794
  """
1749
1795
  return super().ndim
@@ -1997,12 +2043,6 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
1997
2043
  Berkeley 25.0 77.0 298.15
1998
2044
  """
1999
2045
 
2000
- def apply_if_callable(maybe_callable, obj, **kwargs):
2001
- if callable(maybe_callable):
2002
- return maybe_callable(obj, **kwargs)
2003
-
2004
- return maybe_callable
2005
-
2006
2046
  data = self.copy()
2007
2047
 
2008
2048
  for k, v in kwargs.items():
@@ -2197,6 +2237,9 @@ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
2197
2237
  pd.Categorical(categories).categories, store_data=True
2198
2238
  )
2199
2239
 
2240
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2241
+ pass
2242
+
2200
2243
  def _to_str(self, representation=False):
2201
2244
  if is_build_mode() or len(self._executed_sessions) == 0:
2202
2245
  # in build mode, or not executed, just return representation
@@ -2347,6 +2390,9 @@ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
2347
2390
  data_params["name"] = self.chunks[0].name
2348
2391
  self._data_params.update(data_params)
2349
2392
 
2393
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2394
+ pass
2395
+
2350
2396
  def ensure_data(self):
2351
2397
  from .fetch.core import DataFrameFetch
2352
2398
 
@@ -22,7 +22,7 @@ from pandas._libs.tslibs import timezones
22
22
  from pandas.tseries.frequencies import to_offset
23
23
  from pandas.tseries.offsets import Tick
24
24
 
25
- from ... import opcodes as OperandDef
25
+ from ... import opcodes
26
26
  from ...core import OutputType
27
27
  from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
28
28
  from ...utils import no_default, pd_release_version
@@ -117,7 +117,7 @@ def generate_range_count(
117
117
 
118
118
 
119
119
  class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
120
- _op_type_ = OperandDef.DATE_RANGE
120
+ _op_type_ = opcodes.DATE_RANGE
121
121
 
122
122
  start = AnyField("start")
123
123
  end = AnyField("end")