maxframe 1.0.0rc1__cp37-cp37m-win_amd64.whl → 1.0.0rc3__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (138) hide show
  1. maxframe/_utils.cp37-win_amd64.pyd +0 -0
  2. maxframe/codegen.py +3 -6
  3. maxframe/config/config.py +49 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +15 -2
  6. maxframe/core/__init__.py +2 -13
  7. maxframe/core/entity/__init__.py +0 -4
  8. maxframe/core/entity/objects.py +46 -3
  9. maxframe/core/entity/output_types.py +0 -3
  10. maxframe/core/entity/tests/test_objects.py +43 -0
  11. maxframe/core/entity/tileables.py +5 -78
  12. maxframe/core/graph/__init__.py +2 -2
  13. maxframe/core/graph/builder/__init__.py +0 -1
  14. maxframe/core/graph/builder/base.py +5 -4
  15. maxframe/core/graph/builder/tileable.py +4 -4
  16. maxframe/core/graph/builder/utils.py +4 -8
  17. maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
  18. maxframe/core/graph/entity.py +9 -33
  19. maxframe/core/operator/__init__.py +2 -9
  20. maxframe/core/operator/base.py +3 -5
  21. maxframe/core/operator/objects.py +0 -9
  22. maxframe/core/operator/utils.py +55 -0
  23. maxframe/dataframe/__init__.py +1 -1
  24. maxframe/dataframe/arithmetic/around.py +5 -17
  25. maxframe/dataframe/arithmetic/core.py +15 -7
  26. maxframe/dataframe/arithmetic/docstring.py +5 -55
  27. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +22 -0
  28. maxframe/dataframe/core.py +5 -5
  29. maxframe/dataframe/datasource/date_range.py +2 -2
  30. maxframe/dataframe/datasource/read_odps_query.py +7 -1
  31. maxframe/dataframe/datasource/read_odps_table.py +3 -2
  32. maxframe/dataframe/datasource/tests/test_datasource.py +14 -0
  33. maxframe/dataframe/datastore/to_odps.py +1 -1
  34. maxframe/dataframe/groupby/cum.py +0 -1
  35. maxframe/dataframe/groupby/tests/test_groupby.py +4 -0
  36. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  37. maxframe/dataframe/indexing/rename.py +3 -37
  38. maxframe/dataframe/indexing/sample.py +0 -1
  39. maxframe/dataframe/indexing/set_index.py +68 -1
  40. maxframe/dataframe/merge/merge.py +236 -2
  41. maxframe/dataframe/merge/tests/test_merge.py +123 -0
  42. maxframe/dataframe/misc/apply.py +3 -10
  43. maxframe/dataframe/misc/case_when.py +1 -1
  44. maxframe/dataframe/misc/describe.py +2 -2
  45. maxframe/dataframe/misc/drop_duplicates.py +4 -25
  46. maxframe/dataframe/misc/eval.py +4 -0
  47. maxframe/dataframe/misc/pct_change.py +1 -83
  48. maxframe/dataframe/misc/transform.py +1 -30
  49. maxframe/dataframe/misc/value_counts.py +4 -17
  50. maxframe/dataframe/missing/dropna.py +1 -1
  51. maxframe/dataframe/missing/fillna.py +5 -5
  52. maxframe/dataframe/operators.py +1 -17
  53. maxframe/dataframe/reduction/core.py +2 -2
  54. maxframe/dataframe/sort/sort_values.py +1 -11
  55. maxframe/dataframe/statistics/quantile.py +5 -17
  56. maxframe/dataframe/utils.py +4 -7
  57. maxframe/io/objects/__init__.py +24 -0
  58. maxframe/io/objects/core.py +140 -0
  59. maxframe/io/objects/tensor.py +76 -0
  60. maxframe/io/objects/tests/__init__.py +13 -0
  61. maxframe/io/objects/tests/test_object_io.py +97 -0
  62. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  63. maxframe/{odpsio → io/odpsio}/arrow.py +12 -8
  64. maxframe/{odpsio → io/odpsio}/schema.py +15 -12
  65. maxframe/io/odpsio/tableio.py +702 -0
  66. maxframe/io/odpsio/tests/__init__.py +13 -0
  67. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +19 -18
  68. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  69. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  70. maxframe/io/odpsio/volumeio.py +57 -0
  71. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  72. maxframe/learn/contrib/xgboost/core.py +87 -2
  73. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  74. maxframe/learn/contrib/xgboost/predict.py +21 -7
  75. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  76. maxframe/learn/contrib/xgboost/train.py +27 -17
  77. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  78. maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
  79. maxframe/protocol.py +41 -17
  80. maxframe/remote/core.py +4 -8
  81. maxframe/serialization/__init__.py +1 -0
  82. maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
  83. maxframe/serialization/serializables/core.py +48 -9
  84. maxframe/tensor/__init__.py +69 -2
  85. maxframe/tensor/arithmetic/isclose.py +1 -0
  86. maxframe/tensor/arithmetic/tests/test_arithmetic.py +21 -17
  87. maxframe/tensor/core.py +5 -136
  88. maxframe/tensor/datasource/array.py +3 -0
  89. maxframe/tensor/datasource/full.py +1 -1
  90. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  91. maxframe/tensor/indexing/flatnonzero.py +1 -1
  92. maxframe/tensor/merge/__init__.py +2 -0
  93. maxframe/tensor/merge/concatenate.py +98 -0
  94. maxframe/tensor/merge/tests/test_merge.py +30 -1
  95. maxframe/tensor/merge/vstack.py +70 -0
  96. maxframe/tensor/{base → misc}/__init__.py +2 -0
  97. maxframe/tensor/{base → misc}/atleast_1d.py +0 -2
  98. maxframe/tensor/misc/atleast_2d.py +70 -0
  99. maxframe/tensor/misc/atleast_3d.py +85 -0
  100. maxframe/tensor/misc/tests/__init__.py +13 -0
  101. maxframe/tensor/{base → misc}/transpose.py +22 -18
  102. maxframe/tensor/{base → misc}/unique.py +2 -2
  103. maxframe/tensor/operators.py +1 -7
  104. maxframe/tensor/random/core.py +1 -1
  105. maxframe/tensor/reduction/count_nonzero.py +1 -0
  106. maxframe/tensor/reduction/mean.py +1 -0
  107. maxframe/tensor/reduction/nanmean.py +1 -0
  108. maxframe/tensor/reduction/nanvar.py +2 -0
  109. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  110. maxframe/tensor/reduction/var.py +2 -0
  111. maxframe/tensor/statistics/quantile.py +2 -2
  112. maxframe/tensor/utils.py +2 -22
  113. maxframe/tests/utils.py +11 -2
  114. maxframe/typing_.py +4 -1
  115. maxframe/udf.py +8 -9
  116. maxframe/utils.py +32 -70
  117. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/METADATA +2 -2
  118. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/RECORD +133 -123
  119. maxframe_client/fetcher.py +60 -68
  120. maxframe_client/session/graph.py +8 -2
  121. maxframe_client/session/odps.py +58 -22
  122. maxframe_client/tests/test_fetcher.py +21 -3
  123. maxframe_client/tests/test_session.py +27 -4
  124. maxframe/core/entity/chunks.py +0 -68
  125. maxframe/core/entity/fuse.py +0 -73
  126. maxframe/core/graph/builder/chunk.py +0 -430
  127. maxframe/odpsio/tableio.py +0 -322
  128. maxframe/odpsio/volumeio.py +0 -95
  129. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  130. /maxframe/{tensor/base/tests → io}/__init__.py +0 -0
  131. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  132. /maxframe/tensor/{base → misc}/astype.py +0 -0
  133. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  134. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  135. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  136. /maxframe/tensor/{base → misc}/where.py +0 -0
  137. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/WHEEL +0 -0
  138. {maxframe-1.0.0rc1.dist-info → maxframe-1.0.0rc3.dist-info}/top_level.txt +0 -0
@@ -319,6 +319,7 @@ def df_apply(
319
319
  skip_infer=False,
320
320
  **kwds,
321
321
  ):
322
+ # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/50
322
323
  """
323
324
  Apply a function along an axis of the DataFrame.
324
325
 
@@ -444,20 +445,12 @@ def df_apply(
444
445
  B 27
445
446
  dtype: int64
446
447
 
447
- >>> df.apply(np.sum, axis=1).execute()
448
+ >>> df.apply(lambda row: int(np.sum(row)), axis=1).execute()
448
449
  0 13
449
450
  1 13
450
451
  2 13
451
452
  dtype: int64
452
453
 
453
- Returning a list-like will result in a Series
454
-
455
- >>> df.apply(lambda x: [1, 2], axis=1).execute()
456
- 0 [1, 2]
457
- 1 [1, 2]
458
- 2 [1, 2]
459
- dtype: object
460
-
461
454
  Passing ``result_type='expand'`` will expand list-like results
462
455
  to columns of a Dataframe
463
456
 
@@ -471,7 +464,7 @@ def df_apply(
471
464
  ``result_type='expand'``. The resulting column names
472
465
  will be the Series index.
473
466
 
474
- >>> df.apply(lambda x: md.Series([1, 2], index=['foo', 'bar']), axis=1).execute()
467
+ >>> df.apply(lambda x: pd.Series([1, 2], index=['foo', 'bar']), axis=1).execute()
475
468
  foo bar
476
469
  0 1 2
477
470
  1 1 2
@@ -99,7 +99,7 @@ def case_when(series, caselist):
99
99
  >>> b = md.Series([0, 3, 4, 5])
100
100
 
101
101
  >>> c.case_when(caselist=[(a.gt(0), a), # condition, replacement
102
- ... (b.gt(0), b)])
102
+ ... (b.gt(0), b)]).execute()
103
103
  0 6
104
104
  1 3
105
105
  2 1
@@ -15,7 +15,7 @@
15
15
  import numpy as np
16
16
  import pandas as pd
17
17
 
18
- from ... import opcodes as OperandDef
18
+ from ... import opcodes
19
19
  from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
20
20
  from ..core import SERIES_TYPE
21
21
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -23,7 +23,7 @@ from ..utils import build_empty_df, parse_index
23
23
 
24
24
 
25
25
  class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
26
- _op_type_ = OperandDef.DESCRIBE
26
+ _op_type_ = opcodes.DESCRIBE
27
27
 
28
28
  input = KeyField("input", default=None)
29
29
  percentiles = ListField("percentiles", FieldTypes.float64, default=None)
@@ -37,16 +37,15 @@ class DataFrameDropDuplicates(DuplicateOperand):
37
37
  shape += (3,)
38
38
  return shape
39
39
 
40
- @classmethod
41
- def _gen_tileable_params(cls, op: "DataFrameDropDuplicates", input_params):
40
+ def _gen_tileable_params(self, op: "DataFrameDropDuplicates", input_params):
42
41
  params = input_params.copy()
43
- if op.ignore_index:
42
+ if op.ignore_index and self._output_types[0] != OutputType.index:
44
43
  params["index_value"] = parse_index(pd.RangeIndex(-1))
45
44
  else:
46
45
  params["index_value"] = gen_unknown_index_value(
47
46
  input_params["index_value"], op.keep, op.subset, type(op).__name__
48
47
  )
49
- params["shape"] = cls._get_shape(input_params["shape"], op)
48
+ params["shape"] = self._get_shape(input_params["shape"], op)
50
49
  return params
51
50
 
52
51
  def __call__(self, inp, inplace=False):
@@ -105,6 +104,7 @@ def df_drop_duplicates(
105
104
  def series_drop_duplicates(
106
105
  series, keep="first", inplace=False, ignore_index=False, method="auto"
107
106
  ):
107
+ # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/12
108
108
  """
109
109
  Return Series with duplicate values removed.
110
110
 
@@ -148,27 +148,6 @@ def series_drop_duplicates(
148
148
  5 hippo
149
149
  Name: animal, dtype: object
150
150
 
151
- With the 'keep' parameter, the selection behaviour of duplicated values
152
- can be changed. The value 'first' keeps the first occurrence for each
153
- set of duplicated entries. The default value of keep is 'first'.
154
-
155
- >>> s.drop_duplicates().execute()
156
- 0 lame
157
- 1 cow
158
- 3 beetle
159
- 5 hippo
160
- Name: animal, dtype: object
161
-
162
- The value 'last' for parameter 'keep' keeps the last occurrence for
163
- each set of duplicated entries.
164
-
165
- >>> s.drop_duplicates(keep='last').execute()
166
- 1 cow
167
- 3 beetle
168
- 4 lame
169
- 5 hippo
170
- Name: animal, dtype: object
171
-
172
151
  The value ``False`` for parameter 'keep' discards all sets of
173
152
  duplicated entries. Setting the value of 'inplace' to ``True`` performs
174
153
  the operation inplace and returns ``None``.
@@ -120,6 +120,10 @@ class CollectionVisitor(ast.NodeVisitor):
120
120
  if obj_name in self.env:
121
121
  self.referenced_vars.add(obj_name)
122
122
  return self.env[obj_name]
123
+ try:
124
+ return self.target[obj_name]
125
+ except KeyError:
126
+ pass
123
127
  raise KeyError(f"name {obj_name} is not defined")
124
128
 
125
129
  def visit(self, node):
@@ -18,6 +18,7 @@ from ..utils import validate_axis
18
18
  def pct_change(
19
19
  df_or_series, periods=1, fill_method="pad", limit=None, freq=None, **kwargs
20
20
  ):
21
+ # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/32
21
22
  """
22
23
  Percentage change between the current and a prior element.
23
24
 
@@ -50,89 +51,6 @@ def pct_change(
50
51
  DataFrame.diff : Compute the difference of two elements in a DataFrame.
51
52
  Series.shift : Shift the index by some number of periods.
52
53
  DataFrame.shift : Shift the index by some number of periods.
53
-
54
- Examples
55
- --------
56
- **Series**
57
-
58
- >>> import maxframe.dataframe as md
59
-
60
- >>> s = md.Series([90, 91, 85])
61
- >>> s.execute()
62
- 0 90
63
- 1 91
64
- 2 85
65
- dtype: int64
66
-
67
- >>> s.pct_change().execute()
68
- 0 NaN
69
- 1 0.011111
70
- 2 -0.065934
71
- dtype: float64
72
-
73
- >>> s.pct_change(periods=2).execute()
74
- 0 NaN
75
- 1 NaN
76
- 2 -0.055556
77
- dtype: float64
78
-
79
- See the percentage change in a Series where filling NAs with last
80
- valid observation forward to next valid.
81
-
82
- >>> s = md.Series([90, 91, None, 85])
83
- >>> s.execute()
84
- 0 90.0
85
- 1 91.0
86
- 2 NaN
87
- 3 85.0
88
- dtype: float64
89
-
90
- >>> s.pct_change(fill_method='ffill').execute()
91
- 0 NaN
92
- 1 0.011111
93
- 2 0.000000
94
- 3 -0.065934
95
- dtype: float64
96
-
97
- **DataFrame**
98
-
99
- Percentage change in French franc, Deutsche Mark, and Italian lira from
100
- 1980-01-01 to 1980-03-01.
101
-
102
- >>> df = md.DataFrame({
103
- ... 'FR': [4.0405, 4.0963, 4.3149],
104
- ... 'GR': [1.7246, 1.7482, 1.8519],
105
- ... 'IT': [804.74, 810.01, 860.13]},
106
- ... index=['1980-01-01', '1980-02-01', '1980-03-01'])
107
- >>> df.execute()
108
- FR GR IT
109
- 1980-01-01 4.0405 1.7246 804.74
110
- 1980-02-01 4.0963 1.7482 810.01
111
- 1980-03-01 4.3149 1.8519 860.13
112
-
113
- >>> df.pct_change().execute()
114
- FR GR IT
115
- 1980-01-01 NaN NaN NaN
116
- 1980-02-01 0.013810 0.013684 0.006549
117
- 1980-03-01 0.053365 0.059318 0.061876
118
-
119
- Percentage of change in GOOG and APPL stock volume. Shows computing
120
- the percentage change between columns.
121
-
122
- >>> df = md.DataFrame({
123
- ... '2016': [1769950, 30586265],
124
- ... '2015': [1500923, 40912316],
125
- ... '2014': [1371819, 41403351]},
126
- ... index=['GOOG', 'APPL'])
127
- >>> df.execute()
128
- 2016 2015 2014
129
- GOOG 1769950 1500923 1371819
130
- APPL 30586265 40912316 41403351
131
-
132
- >>> df.pct_change(axis='columns').execute()
133
- 2016 2015 2014
134
- GOOG NaN -0.151997 -0.086016
135
- APPL NaN 0.337604 0.012002
136
54
  """
137
55
 
138
56
  axis = validate_axis(kwargs.pop("axis", 0))
@@ -228,21 +228,6 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
228
228
  0 1 2
229
229
  1 2 3
230
230
  2 3 4
231
-
232
- Even though the resulting DataFrame must have the same length as the
233
- input DataFrame, it is possible to provide several input functions:
234
-
235
- >>> s = md.Series(range(3))
236
- >>> s.execute()
237
- 0 0
238
- 1 1
239
- 2 2
240
- dtype: int64
241
- >>> s.transform([mt.sqrt, mt.exp]).execute()
242
- sqrt exp
243
- 0 0.000000 1.000000
244
- 1 1.000000 2.718282
245
- 2 1.414214 7.389056
246
231
  """
247
232
  op = TransformOperator(
248
233
  func=func,
@@ -265,6 +250,7 @@ def series_transform(
265
250
  dtype=None,
266
251
  **kwargs
267
252
  ):
253
+ # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/10
268
254
  """
269
255
  Call ``func`` on self producing a Series with transformed values.
270
256
 
@@ -332,21 +318,6 @@ def series_transform(
332
318
  0 1 2
333
319
  1 2 3
334
320
  2 3 4
335
-
336
- Even though the resulting Series must have the same length as the
337
- input Series, it is possible to provide several input functions:
338
-
339
- >>> s = md.Series(range(3))
340
- >>> s.execute()
341
- 0 0
342
- 1 1
343
- 2 2
344
- dtype: int64
345
- >>> s.transform([mt.sqrt, mt.exp]).execute()
346
- sqrt exp
347
- 0 0.000000 1.000000
348
- 1 1.000000 2.718282
349
- 2 1.414214 7.389056
350
321
  """
351
322
  op = TransformOperator(
352
323
  func=func,
@@ -85,6 +85,7 @@ def value_counts(
85
85
  dropna=True,
86
86
  method="auto",
87
87
  ):
88
+ # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/33
88
89
  """
89
90
  Return a Series containing counts of unique values.
90
91
 
@@ -125,9 +126,8 @@ def value_counts(
125
126
  Examples
126
127
  --------
127
128
  >>> import maxframe.dataframe as md
128
- >>> import maxframe.tensor as mt
129
-
130
- >>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
129
+ >>> import numpy as np
130
+ >>> s = md.Series([3, 1, 2, 3, 4, np.nan])
131
131
  >>> s.value_counts().execute()
132
132
  3.0 2
133
133
  4.0 1
@@ -138,7 +138,7 @@ def value_counts(
138
138
  With `normalize` set to `True`, returns the relative frequency by
139
139
  dividing all values by the sum of values.
140
140
 
141
- >>> s = md.Series([3, 1, 2, 3, 4, mt.nan])
141
+ >>> s = md.Series([3, 1, 2, 3, 4, np.nan])
142
142
  >>> s.value_counts(normalize=True).execute()
143
143
  3.0 0.4
144
144
  4.0 0.2
@@ -146,19 +146,6 @@ def value_counts(
146
146
  1.0 0.2
147
147
  dtype: float64
148
148
 
149
- **bins**
150
-
151
- Bins can be useful for going from a continuous variable to a
152
- categorical variable; instead of counting unique
153
- apparitions of values, divide the index in the specified
154
- number of half-open bins.
155
-
156
- >>> s.value_counts(bins=3).execute()
157
- (2.0, 3.0] 2
158
- (0.996, 2.0] 2
159
- (3.0, 4.0] 1
160
- dtype: int64
161
-
162
149
  **dropna**
163
150
 
164
151
  With `dropna` set to `False` we can also see NaN index values.
@@ -234,7 +234,7 @@ def series_dropna(series, axis=0, inplace=False, how=None):
234
234
  Empty strings are not considered NA values. ``None`` is considered an
235
235
  NA value.
236
236
 
237
- >>> ser = md.Series([np.NaN, 2, md.NaT, '', None, 'I stay'])
237
+ >>> ser = md.Series([np.NaN, '2', md.NaT, '', None, 'I stay'])
238
238
  >>> ser.execute()
239
239
  0 NaN
240
240
  1 2
@@ -132,11 +132,11 @@ def fillna(
132
132
  --------
133
133
  >>> import maxframe.tensor as mt
134
134
  >>> import maxframe.dataframe as md
135
- >>> df = md.DataFrame([[mt.nan, 2, mt.nan, 0],
136
- ... [3, 4, mt.nan, 1],
137
- ... [mt.nan, mt.nan, mt.nan, 5],
138
- ... [mt.nan, 3, mt.nan, 4]],
139
- ... columns=list('ABCD'))
135
+ >>> df = md.DataFrame([[np.nan, 2, np.nan, 0],
136
+ [3, 4, np.nan, 1],
137
+ [np.nan, np.nan, np.nan, 5],
138
+ [np.nan, 3, np.nan, 4]],
139
+ columns=list('ABCD'))
140
140
  >>> df.execute()
141
141
  A B C D
142
142
  0 NaN 2.0 NaN 0
@@ -16,13 +16,7 @@ import numpy as np
16
16
  import pandas as pd
17
17
 
18
18
  from ..core import ENTITY_TYPE, OutputType
19
- from ..core.operator import (
20
- Fuse,
21
- FuseChunkMixin,
22
- Operator,
23
- ShuffleProxy,
24
- TileableOperatorMixin,
25
- )
19
+ from ..core.operator import Operator, ShuffleProxy, TileableOperatorMixin
26
20
  from ..tensor.core import TENSOR_TYPE
27
21
  from ..tensor.datasource import tensor as astensor
28
22
  from .core import DATAFRAME_TYPE, SERIES_TYPE
@@ -261,13 +255,3 @@ DataFrameOperator = Operator
261
255
  class DataFrameShuffleProxy(ShuffleProxy, DataFrameOperatorMixin):
262
256
  def __init__(self, sparse=None, output_types=None, **kwargs):
263
257
  super().__init__(sparse=sparse, _output_types=output_types, **kwargs)
264
-
265
-
266
- class DataFrameFuseChunkMixin(FuseChunkMixin, DataFrameOperatorMixin):
267
- __slots__ = ()
268
-
269
-
270
- class DataFrameFuseChunk(Fuse, DataFrameFuseChunkMixin):
271
- @property
272
- def output_types(self):
273
- return self.outputs[-1].chunk.op.output_types
@@ -552,7 +552,7 @@ class ReductionCompiler:
552
552
  @enter_mode(build=True)
553
553
  def _compile_function(self, func, func_name=None, ndim=1) -> ReductionSteps:
554
554
  from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
555
- from ...tensor.base import TensorWhere
555
+ from ...tensor.misc import TensorWhere
556
556
  from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
557
557
  from ..datasource.dataframe import DataFrameDataSource
558
558
  from ..datasource.series import SeriesDataSource
@@ -679,8 +679,8 @@ class ReductionCompiler:
679
679
  ]
680
680
  """
681
681
  from ...tensor.arithmetic.core import TensorBinOp, TensorUnaryOp
682
- from ...tensor.base import TensorWhere
683
682
  from ...tensor.datasource import Scalar
683
+ from ...tensor.misc import TensorWhere
684
684
  from ..arithmetic.core import DataFrameBinOp, DataFrameUnaryOp
685
685
  from ..datasource.dataframe import DataFrameDataSource
686
686
  from ..datasource.series import SeriesDataSource
@@ -67,6 +67,7 @@ def dataframe_sort_values(
67
67
  parallel_kind="PSRS",
68
68
  psrs_kinds=None,
69
69
  ):
70
+ # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/15
70
71
  """
71
72
  Sort by the values along either axis.
72
73
 
@@ -152,17 +153,6 @@ def dataframe_sort_values(
152
153
  0 A 2 0
153
154
  1 A 1 1
154
155
  3 NaN 8 4
155
-
156
- Putting NAs first
157
-
158
- >>> df.sort_values(by='col1', ascending=False, na_position='first').execute()
159
- col1 col2 col3
160
- 3 NaN 8 4
161
- 4 D 7 2
162
- 5 C 4 3
163
- 2 B 9 9
164
- 0 A 2 0
165
- 1 A 1 1
166
156
  """
167
157
 
168
158
  if na_position not in ["last", "first"]: # pragma: no cover
@@ -14,8 +14,9 @@
14
14
 
15
15
  import numpy as np
16
16
  import pandas as pd
17
+ from pandas.core.dtypes.cast import find_common_type
17
18
 
18
- from ... import opcodes as OperandDef
19
+ from ... import opcodes
19
20
  from ...core import ENTITY_TYPE
20
21
  from ...serialization.serializables import (
21
22
  AnyField,
@@ -32,11 +33,11 @@ from ...tensor.datasource import tensor as astensor
32
33
  from ...tensor.statistics.quantile import quantile as tensor_quantile
33
34
  from ..core import DATAFRAME_TYPE
34
35
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
35
- from ..utils import build_empty_df, find_common_type, parse_index, validate_axis
36
+ from ..utils import build_empty_df, parse_index, validate_axis
36
37
 
37
38
 
38
39
  class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
39
- _op_type_ = OperandDef.QUANTILE
40
+ _op_type_ = opcodes.QUANTILE
40
41
 
41
42
  input = KeyField("input", default=None)
42
43
  q = AnyField("q", default=None)
@@ -259,6 +260,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
259
260
 
260
261
 
261
262
  def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
263
+ # FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
262
264
  """
263
265
  Return values at the given quantile over requested axis.
264
266
 
@@ -309,20 +311,6 @@ def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="line
309
311
  a b
310
312
  0.1 1.3 3.7
311
313
  0.5 2.5 55.0
312
-
313
- Specifying `numeric_only=False` will also compute the quantile of
314
- datetime and timedelta data.
315
-
316
- >>> df = md.DataFrame({'A': [1, 2],
317
- ... 'B': [md.Timestamp('2010'),
318
- ... md.Timestamp('2011')],
319
- ... 'C': [md.Timedelta('1 days'),
320
- ... md.Timedelta('2 days')]})
321
- >>> df.quantile(0.5, numeric_only=False).execute()
322
- A 1.5
323
- B 2010-07-02 12:00:00
324
- C 1 days 12:00:00
325
- Name: 0.5, dtype: object
326
314
  """
327
315
  if isinstance(q, ENTITY_TYPE):
328
316
  q = astensor(q)
@@ -26,7 +26,6 @@ import numpy as np
26
26
  import pandas as pd
27
27
  from pandas.api.extensions import ExtensionDtype
28
28
  from pandas.api.types import is_string_dtype
29
- from pandas.core.dtypes.cast import find_common_type
30
29
  from pandas.core.dtypes.inference import is_dict_like, is_list_like
31
30
 
32
31
  from ..core import Entity, ExecutableTuple
@@ -477,11 +476,11 @@ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
477
476
  else:
478
477
  fill_values = fill_value
479
478
 
480
- from .core import SERIES_TYPE
479
+ from .core import INDEX_TYPE, SERIES_TYPE
481
480
 
482
481
  dtypes = (
483
482
  pd.Series([df_obj.dtype], index=[df_obj.name])
484
- if isinstance(df_obj, SERIES_TYPE)
483
+ if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
485
484
  else df_obj.dtypes
486
485
  )
487
486
  for size, fill_value in zip(sizes, fill_values):
@@ -593,7 +592,7 @@ def build_series(
593
592
  return ret_series
594
593
 
595
594
 
596
- def infer_index_value(left_index_value, right_index_value):
595
+ def infer_index_value(left_index_value, right_index_value, level=None):
597
596
  from .core import IndexValue
598
597
 
599
598
  if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
@@ -616,9 +615,7 @@ def infer_index_value(left_index_value, right_index_value):
616
615
 
617
616
  left_index = left_index_value.to_pandas()
618
617
  right_index = right_index_value.to_pandas()
619
- out_index = pd.Index(
620
- [], dtype=find_common_type([left_index.dtype, right_index.dtype])
621
- )
618
+ out_index = left_index.join(right_index, level=level)[:0]
622
619
  return parse_index(out_index, left_index_value, right_index_value)
623
620
 
624
621
 
@@ -0,0 +1,24 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .core import (
16
+ AbstractObjectIOHandler,
17
+ get_object_io_handler,
18
+ register_object_io_handler,
19
+ )
20
+
21
+ # isort: off
22
+ from . import tensor
23
+
24
+ del tensor