maxframe 0.1.0b4__cp39-cp39-win32.whl → 1.0.0__cp39-cp39-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp39-win32.pyd +0 -0
- maxframe/codegen.py +56 -5
- maxframe/config/config.py +78 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp39-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +2 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +58 -12
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +120 -24
- maxframe/dataframe/datasource/read_odps_table.py +9 -4
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +6 -11
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +93 -1
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +4 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +33 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +110 -0
- maxframe/learn/contrib/xgboost/core.py +241 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
- maxframe/learn/contrib/xgboost/predict.py +121 -0
- maxframe/learn/contrib/xgboost/regressor.py +71 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +132 -0
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp39-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +11 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp39-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +37 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +7 -2
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +4 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/misc/unique.py +205 -0
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +62 -3
- maxframe/utils.py +112 -86
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +123 -54
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +223 -40
- maxframe_client/session/task.py +108 -80
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +136 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -300
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/dataframe/__init__.py
CHANGED
|
@@ -46,6 +46,7 @@ from .misc.cut import cut
|
|
|
46
46
|
from .misc.eval import maxframe_eval as eval # pylint: disable=redefined-builtin
|
|
47
47
|
from .misc.get_dummies import get_dummies
|
|
48
48
|
from .misc.melt import melt
|
|
49
|
+
from .misc.pivot_table import pivot_table
|
|
49
50
|
from .misc.qcut import qcut
|
|
50
51
|
from .misc.to_numeric import to_numeric
|
|
51
52
|
from .missing import isna, isnull, notna, notnull
|
|
@@ -53,7 +54,7 @@ from .reduction import CustomReduction, unique
|
|
|
53
54
|
from .tseries.to_datetime import to_datetime
|
|
54
55
|
|
|
55
56
|
try:
|
|
56
|
-
from pandas import NA, Timestamp
|
|
57
|
+
from pandas import NA, NaT, Timestamp
|
|
57
58
|
except ImportError: # pragma: no cover
|
|
58
59
|
pass
|
|
59
60
|
|
|
@@ -43,20 +43,20 @@ def around(df, decimals=0, *args, **kwargs):
|
|
|
43
43
|
return op(df)
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
# FIXME Series input of decimals not supported yet
|
|
46
47
|
around.__frame_doc__ = """
|
|
47
48
|
Round a DataFrame to a variable number of decimal places.
|
|
48
49
|
|
|
49
50
|
Parameters
|
|
50
51
|
----------
|
|
51
|
-
decimals : int, dict
|
|
52
|
+
decimals : int, dict
|
|
52
53
|
Number of decimal places to round each column to. If an int is
|
|
53
54
|
given, round each column to the same number of places.
|
|
54
55
|
Otherwise dict and Series round to variable numbers of places.
|
|
55
56
|
Column names should be in the keys if `decimals` is a
|
|
56
|
-
dict-like
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
ignored.
|
|
57
|
+
dict-like. Any columns not included in `decimals` will be left
|
|
58
|
+
as is. Elements of `decimals` which are not columns of the
|
|
59
|
+
input will be ignored.
|
|
60
60
|
*args
|
|
61
61
|
Additional keywords have no effect but might be accepted for
|
|
62
62
|
compatibility with numpy.
|
|
@@ -107,18 +107,6 @@ places as value
|
|
|
107
107
|
1 0.0 1.0
|
|
108
108
|
2 0.7 0.0
|
|
109
109
|
3 0.2 0.0
|
|
110
|
-
|
|
111
|
-
Using a Series, the number of places for specific columns can be
|
|
112
|
-
specified with the column names as index and the number of
|
|
113
|
-
decimal places as value
|
|
114
|
-
|
|
115
|
-
>>> decimals = md.Series([0, 1], index=['cats', 'dogs'])
|
|
116
|
-
>>> df.round(decimals).execute()
|
|
117
|
-
dogs cats
|
|
118
|
-
0 0.2 0.0
|
|
119
|
-
1 0.0 1.0
|
|
120
|
-
2 0.7 0.0
|
|
121
|
-
3 0.2 0.0
|
|
122
110
|
"""
|
|
123
111
|
around.__series_doc__ = """
|
|
124
112
|
Round each value in a Series to the given number of decimals.
|
|
@@ -39,7 +39,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
39
39
|
raise NotImplementedError
|
|
40
40
|
|
|
41
41
|
@classmethod
|
|
42
|
-
def _calc_properties(cls, x1, x2=None, axis="columns"):
|
|
42
|
+
def _calc_properties(cls, x1, x2=None, axis="columns", level=None):
|
|
43
43
|
if isinstance(x1, DATAFRAME_TYPE) and (
|
|
44
44
|
x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
|
|
45
45
|
):
|
|
@@ -108,7 +108,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
108
108
|
index = copy.copy(x1.index_value)
|
|
109
109
|
index_shape = x1.shape[0]
|
|
110
110
|
else:
|
|
111
|
-
index = infer_index_value(
|
|
111
|
+
index = infer_index_value(
|
|
112
|
+
x1.index_value, x2.index_value, level=level
|
|
113
|
+
)
|
|
112
114
|
if index.key == x1.index_value.key == x2.index_value.key and (
|
|
113
115
|
not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
|
|
114
116
|
):
|
|
@@ -141,7 +143,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
141
143
|
column_shape = len(dtypes)
|
|
142
144
|
else: # pragma: no cover
|
|
143
145
|
dtypes = x1.dtypes # FIXME
|
|
144
|
-
columns = infer_index_value(
|
|
146
|
+
columns = infer_index_value(
|
|
147
|
+
x1.columns_value, x2.index_value, level=level
|
|
148
|
+
)
|
|
145
149
|
column_shape = np.nan
|
|
146
150
|
else:
|
|
147
151
|
assert axis == "index" or axis == 0
|
|
@@ -169,7 +173,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
169
173
|
],
|
|
170
174
|
index=x1.dtypes.index,
|
|
171
175
|
)
|
|
172
|
-
index = infer_index_value(
|
|
176
|
+
index = infer_index_value(
|
|
177
|
+
x1.index_value, x2.index_value, level=level
|
|
178
|
+
)
|
|
173
179
|
index_shape = np.nan
|
|
174
180
|
return {
|
|
175
181
|
"shape": (index_shape, column_shape),
|
|
@@ -187,7 +193,9 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
187
193
|
index = copy.copy(x1.index_value)
|
|
188
194
|
index_shape = x1.shape[0]
|
|
189
195
|
else:
|
|
190
|
-
index = infer_index_value(
|
|
196
|
+
index = infer_index_value(
|
|
197
|
+
x1.index_value, x2.index_value, level=level
|
|
198
|
+
)
|
|
191
199
|
if index.key == x1.index_value.key == x2.index_value.key and (
|
|
192
200
|
not np.isnan(x1.shape[0]) or not np.isnan(x2.shape[0])
|
|
193
201
|
):
|
|
@@ -237,14 +245,14 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
|
|
|
237
245
|
self._check_inputs(x1, x2)
|
|
238
246
|
if isinstance(x1, DATAFRAME_TYPE) or isinstance(x2, DATAFRAME_TYPE):
|
|
239
247
|
df1, df2 = (x1, x2) if isinstance(x1, DATAFRAME_TYPE) else (x2, x1)
|
|
240
|
-
kw = self._calc_properties(df1, df2, axis=self.axis)
|
|
248
|
+
kw = self._calc_properties(df1, df2, axis=self.axis, level=self.level)
|
|
241
249
|
if not pd.api.types.is_scalar(df2):
|
|
242
250
|
return self.new_dataframe([x1, x2], **kw)
|
|
243
251
|
else:
|
|
244
252
|
return self.new_dataframe([df1], **kw)
|
|
245
253
|
if isinstance(x1, SERIES_TYPE) or isinstance(x2, SERIES_TYPE):
|
|
246
254
|
s1, s2 = (x1, x2) if isinstance(x1, SERIES_TYPE) else (x2, x1)
|
|
247
|
-
kw = self._calc_properties(s1, s2)
|
|
255
|
+
kw = self._calc_properties(s1, s2, level=self.level)
|
|
248
256
|
if not pd.api.types.is_scalar(s2):
|
|
249
257
|
return self.new_series([x1, x2], **kw)
|
|
250
258
|
else:
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
# FIXME:https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/17
|
|
15
16
|
_flex_doc_FRAME = """
|
|
16
17
|
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
|
|
17
18
|
Equivalent to ``{equiv}``, but with support to substitute a fill_value
|
|
@@ -127,44 +128,15 @@ circle 0
|
|
|
127
128
|
triangle 3
|
|
128
129
|
rectangle 4
|
|
129
130
|
|
|
130
|
-
>>> (df * other).execute()
|
|
131
|
-
angles degrees
|
|
132
|
-
circle 0 NaN
|
|
133
|
-
triangle 9 NaN
|
|
134
|
-
rectangle 16 NaN
|
|
135
|
-
|
|
136
131
|
>>> df.mul(other, fill_value=0).execute()
|
|
137
132
|
angles degrees
|
|
138
133
|
circle 0 0.0
|
|
139
134
|
triangle 9 0.0
|
|
140
135
|
rectangle 16 0.0
|
|
141
136
|
|
|
142
|
-
Divide by a MultiIndex by level.
|
|
143
|
-
|
|
144
|
-
>>> df_multindex = md.DataFrame({{'angles': [0, 3, 4, 4, 5, 6],
|
|
145
|
-
... 'degrees': [360, 180, 360, 360, 540, 720]}},
|
|
146
|
-
... index=[['A', 'A', 'A', 'B', 'B', 'B'],
|
|
147
|
-
... ['circle', 'triangle', 'rectangle',
|
|
148
|
-
... 'square', 'pentagon', 'hexagon']])
|
|
149
|
-
>>> df_multindex.execute()
|
|
150
|
-
angles degrees
|
|
151
|
-
A circle 0 360
|
|
152
|
-
triangle 3 180
|
|
153
|
-
rectangle 4 360
|
|
154
|
-
B square 4 360
|
|
155
|
-
pentagon 5 540
|
|
156
|
-
hexagon 6 720
|
|
157
|
-
|
|
158
|
-
>>> df.div(df_multindex, level=1, fill_value=0).execute()
|
|
159
|
-
angles degrees
|
|
160
|
-
A circle NaN 1.0
|
|
161
|
-
triangle 1.0 1.0
|
|
162
|
-
rectangle 1.0 1.0
|
|
163
|
-
B square 0.0 0.0
|
|
164
|
-
pentagon 0.0 0.0
|
|
165
|
-
hexagon 0.0 0.0
|
|
166
137
|
"""
|
|
167
138
|
|
|
139
|
+
# FIXME:https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/28
|
|
168
140
|
_flex_doc_SERIES = """
|
|
169
141
|
Return {desc} of series and other, element-wise (binary operator `{op_name}`).
|
|
170
142
|
|
|
@@ -257,7 +229,8 @@ Mismatched indices will be unioned together.
|
|
|
257
229
|
|
|
258
230
|
Examples
|
|
259
231
|
--------
|
|
260
|
-
>>>
|
|
232
|
+
>>> import maxframe.dataframe as md
|
|
233
|
+
>>> df = md.DataFrame({{'cost': [250, 150, 100],
|
|
261
234
|
... 'revenue': [100, 250, 300]}},
|
|
262
235
|
... index=['A', 'B', 'C'])
|
|
263
236
|
>>> df.execute()
|
|
@@ -317,7 +290,7 @@ C True False
|
|
|
317
290
|
|
|
318
291
|
Compare to a DataFrame of different shape.
|
|
319
292
|
|
|
320
|
-
>>> other =
|
|
293
|
+
>>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
|
|
321
294
|
... index=['A', 'B', 'C', 'D'])
|
|
322
295
|
>>> other.execute()
|
|
323
296
|
revenue
|
|
@@ -335,7 +308,7 @@ D False False
|
|
|
335
308
|
|
|
336
309
|
Compare to a MultiIndex by level.
|
|
337
310
|
|
|
338
|
-
>>> df_multindex =
|
|
311
|
+
>>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
|
|
339
312
|
... 'revenue': [100, 250, 300, 200, 175, 225]}},
|
|
340
313
|
... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
|
|
341
314
|
... ['A', 'B', 'C', 'A', 'B', 'C']])
|
|
@@ -356,6 +329,7 @@ Q1 A True True
|
|
|
356
329
|
Q2 A False True
|
|
357
330
|
B True False
|
|
358
331
|
C True False
|
|
332
|
+
|
|
359
333
|
"""
|
|
360
334
|
|
|
361
335
|
|
|
@@ -51,6 +51,8 @@ dtype: bool
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
|
|
54
|
-
def eq(df, other, axis="columns", level=None):
|
|
55
|
-
op = DataFrameEqual(
|
|
54
|
+
def eq(df, other, axis="columns", level=None, fill_value=None):
|
|
55
|
+
op = DataFrameEqual(
|
|
56
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
57
|
+
)
|
|
56
58
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
|
|
55
|
-
def gt(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameGreater(
|
|
55
|
+
def gt(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameGreater(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
|
|
55
|
-
def ge(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameGreaterEqual(
|
|
55
|
+
def ge(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameGreaterEqual(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -52,6 +52,6 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
|
|
55
|
-
def lt(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
|
|
55
|
+
def lt(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
|
|
57
57
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
|
|
55
|
-
def le(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameLessEqual(
|
|
55
|
+
def le(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameLessEqual(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -51,6 +51,8 @@ dtype: bool
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
|
|
54
|
-
def ne(df, other, axis="columns", level=None):
|
|
55
|
-
op = DataFrameNotEqual(
|
|
54
|
+
def ne(df, other, axis="columns", level=None, fill_value=None):
|
|
55
|
+
op = DataFrameNotEqual(
|
|
56
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
57
|
+
)
|
|
56
58
|
return op(df, other)
|
|
@@ -22,6 +22,7 @@ import pandas as pd
|
|
|
22
22
|
import pytest
|
|
23
23
|
|
|
24
24
|
from ....core import OperatorType
|
|
25
|
+
from ....tests.utils import assert_mf_index_dtype
|
|
25
26
|
from ....utils import dataslots
|
|
26
27
|
from ...core import IndexValue
|
|
27
28
|
from ...datasource.dataframe import from_pandas
|
|
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
164
165
|
pd.testing.assert_index_equal(
|
|
165
166
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
166
167
|
)
|
|
167
|
-
|
|
168
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
168
169
|
pd.testing.assert_index_equal(
|
|
169
170
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
170
171
|
)
|
|
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
176
177
|
pd.testing.assert_index_equal(
|
|
177
178
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
178
179
|
)
|
|
179
|
-
|
|
180
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
180
181
|
pd.testing.assert_index_equal(
|
|
181
182
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
182
183
|
)
|
|
@@ -239,6 +240,28 @@ def test_dataframe_and_series_with_shuffle(func_name, func_opts):
|
|
|
239
240
|
assert df2.columns_value.key != df1.columns_value.key
|
|
240
241
|
|
|
241
242
|
|
|
243
|
+
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
|
|
244
|
+
def test_dataframe_and_series_with_multiindex(func_name, func_opts):
|
|
245
|
+
data1 = pd.DataFrame(
|
|
246
|
+
np.random.rand(10, 10),
|
|
247
|
+
index=pd.MultiIndex.from_arrays(
|
|
248
|
+
[list("AAAAABBBBB"), [4, 9, 3, 2, 1, 5, 8, 6, 7, 10]]
|
|
249
|
+
),
|
|
250
|
+
columns=[4, 1, 3, 2, 10, 5, 9, 8, 6, 7],
|
|
251
|
+
)
|
|
252
|
+
data1 = to_boolean_if_needed(func_opts.func_name, data1)
|
|
253
|
+
df1 = from_pandas(data1, chunk_size=5)
|
|
254
|
+
s1 = from_pandas_series(data1[10].reset_index(level=0, drop=True), chunk_size=6)
|
|
255
|
+
|
|
256
|
+
df2 = getattr(df1, func_opts.func_name)(s1, level=1, axis=0)
|
|
257
|
+
|
|
258
|
+
# test df2's index and columns
|
|
259
|
+
assert df2.shape == (np.nan, df1.shape[1])
|
|
260
|
+
assert df2.index_value.key != df1.index_value.key
|
|
261
|
+
assert df2.index_value.names == df1.index_value.names
|
|
262
|
+
assert df2.columns_value.key == df1.columns_value.key
|
|
263
|
+
|
|
264
|
+
|
|
242
265
|
@pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
|
|
243
266
|
def test_series_and_series_with_align_map(func_name, func_opts):
|
|
244
267
|
data1 = pd.DataFrame(
|
|
@@ -348,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
|
|
|
348
371
|
pd.testing.assert_index_equal(
|
|
349
372
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
350
373
|
)
|
|
351
|
-
|
|
374
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
352
375
|
pd.testing.assert_index_equal(
|
|
353
376
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
354
377
|
)
|
|
@@ -381,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
|
|
|
381
404
|
pd.testing.assert_index_equal(
|
|
382
405
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
383
406
|
)
|
|
384
|
-
|
|
407
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
385
408
|
pd.testing.assert_index_equal(
|
|
386
409
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
387
410
|
)
|
|
@@ -411,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
|
|
|
411
434
|
pd.testing.assert_index_equal(
|
|
412
435
|
df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
|
|
413
436
|
)
|
|
414
|
-
|
|
437
|
+
assert_mf_index_dtype(df6.index_value.value, np.int64)
|
|
415
438
|
pd.testing.assert_index_equal(
|
|
416
439
|
df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
417
440
|
)
|
|
@@ -446,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
|
|
|
446
469
|
pd.testing.assert_index_equal(
|
|
447
470
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
448
471
|
)
|
|
449
|
-
|
|
472
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
450
473
|
pd.testing.assert_index_equal(
|
|
451
474
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
452
475
|
)
|
|
@@ -479,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
|
|
|
479
502
|
pd.testing.assert_index_equal(
|
|
480
503
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
481
504
|
)
|
|
482
|
-
|
|
505
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
483
506
|
pd.testing.assert_index_equal(
|
|
484
507
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
485
508
|
)
|
|
@@ -512,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
|
|
|
512
535
|
pd.testing.assert_index_equal(
|
|
513
536
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
514
537
|
)
|
|
515
|
-
|
|
538
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
516
539
|
pd.testing.assert_index_equal(
|
|
517
540
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
518
541
|
)
|
|
@@ -536,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
|
|
|
536
559
|
pd.testing.assert_index_equal(
|
|
537
560
|
df2.columns_value.to_pandas(), func_opts.func(data, data).columns
|
|
538
561
|
)
|
|
539
|
-
|
|
562
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
540
563
|
pd.testing.assert_index_equal(
|
|
541
564
|
df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
542
565
|
)
|
|
@@ -568,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
|
|
|
568
591
|
pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
|
|
569
592
|
|
|
570
593
|
pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
|
|
571
|
-
|
|
594
|
+
assert_mf_index_dtype(result.index_value.value, np.int64)
|
|
572
595
|
|
|
573
596
|
pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
|
|
574
|
-
|
|
597
|
+
assert_mf_index_dtype(result2.index_value.value, np.int64)
|
|
575
598
|
|
|
576
599
|
pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
|
|
577
|
-
|
|
600
|
+
assert_mf_index_dtype(result3.index_value.value, np.int64)
|
|
578
601
|
|
|
579
602
|
pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
|
|
580
|
-
|
|
603
|
+
assert_mf_index_dtype(result4.index_value.value, np.int64)
|
|
581
604
|
|
|
582
605
|
pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
|
|
583
|
-
|
|
606
|
+
assert_mf_index_dtype(result5.index_value.value, np.int64)
|
|
584
607
|
|
|
585
608
|
if "builtin_function_or_method" not in str(type(func_opts.func)):
|
|
586
609
|
# skip NotImplemented test for comparison function
|
|
@@ -657,7 +680,7 @@ def test_abs():
|
|
|
657
680
|
pd.testing.assert_index_equal(
|
|
658
681
|
df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
|
|
659
682
|
)
|
|
660
|
-
|
|
683
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
661
684
|
assert df2.shape == (10, 10)
|
|
662
685
|
|
|
663
686
|
|
|
@@ -675,7 +698,7 @@ def test_not():
|
|
|
675
698
|
pd.testing.assert_index_equal(
|
|
676
699
|
df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
|
|
677
700
|
)
|
|
678
|
-
|
|
701
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
679
702
|
assert df2.shape == (10, 10)
|
|
680
703
|
|
|
681
704
|
|
maxframe/dataframe/core.py
CHANGED
|
@@ -35,6 +35,7 @@ from ..core import (
|
|
|
35
35
|
register_output_types,
|
|
36
36
|
)
|
|
37
37
|
from ..core.entity.utils import refresh_tileable_shape
|
|
38
|
+
from ..protocol import DataFrameTableMeta
|
|
38
39
|
from ..serialization.serializables import (
|
|
39
40
|
AnyField,
|
|
40
41
|
BoolField,
|
|
@@ -59,7 +60,13 @@ from ..utils import (
|
|
|
59
60
|
on_serialize_numpy_type,
|
|
60
61
|
tokenize,
|
|
61
62
|
)
|
|
62
|
-
from .utils import
|
|
63
|
+
from .utils import (
|
|
64
|
+
ReprSeries,
|
|
65
|
+
apply_if_callable,
|
|
66
|
+
fetch_corner_data,
|
|
67
|
+
merge_index_value,
|
|
68
|
+
parse_index,
|
|
69
|
+
)
|
|
63
70
|
|
|
64
71
|
|
|
65
72
|
class IndexValue(Serializable):
|
|
@@ -135,6 +142,14 @@ class IndexValue(Serializable):
|
|
|
135
142
|
_data = NDArrayField("data")
|
|
136
143
|
_dtype = DataTypeField("dtype")
|
|
137
144
|
|
|
145
|
+
@property
|
|
146
|
+
def dtype(self):
|
|
147
|
+
return getattr(self, "_dtype", None)
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def inferred_type(self):
|
|
151
|
+
return "floating" if self.dtype.kind == "f" else "integer"
|
|
152
|
+
|
|
138
153
|
class RangeIndex(IndexBase):
|
|
139
154
|
_name = AnyField("name")
|
|
140
155
|
_slice = SliceField("slice")
|
|
@@ -236,6 +251,10 @@ class IndexValue(Serializable):
|
|
|
236
251
|
_data = NDArrayField("data")
|
|
237
252
|
_dtype = DataTypeField("dtype")
|
|
238
253
|
|
|
254
|
+
@property
|
|
255
|
+
def dtype(self):
|
|
256
|
+
return getattr(self, "_dtype", None)
|
|
257
|
+
|
|
239
258
|
@property
|
|
240
259
|
def inferred_type(self):
|
|
241
260
|
return "integer"
|
|
@@ -247,6 +266,10 @@ class IndexValue(Serializable):
|
|
|
247
266
|
_data = NDArrayField("data")
|
|
248
267
|
_dtype = DataTypeField("dtype")
|
|
249
268
|
|
|
269
|
+
@property
|
|
270
|
+
def dtype(self):
|
|
271
|
+
return getattr(self, "_dtype", None)
|
|
272
|
+
|
|
250
273
|
@property
|
|
251
274
|
def inferred_type(self):
|
|
252
275
|
return "integer"
|
|
@@ -258,6 +281,10 @@ class IndexValue(Serializable):
|
|
|
258
281
|
_data = NDArrayField("data")
|
|
259
282
|
_dtype = DataTypeField("dtype")
|
|
260
283
|
|
|
284
|
+
@property
|
|
285
|
+
def dtype(self):
|
|
286
|
+
return getattr(self, "_dtype", None)
|
|
287
|
+
|
|
261
288
|
@property
|
|
262
289
|
def inferred_type(self):
|
|
263
290
|
return "floating"
|
|
@@ -616,6 +643,9 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
|
|
|
616
643
|
if self._name is None:
|
|
617
644
|
self._name = self.chunks[0].name
|
|
618
645
|
|
|
646
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
647
|
+
pass
|
|
648
|
+
|
|
619
649
|
def _to_str(self, representation=False):
|
|
620
650
|
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
621
651
|
# in build mode, or not executed, just return representation
|
|
@@ -945,6 +975,9 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
|
945
975
|
if self._name is None:
|
|
946
976
|
self._name = self.chunks[0].name
|
|
947
977
|
|
|
978
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
979
|
+
pass
|
|
980
|
+
|
|
948
981
|
def _to_str(self, representation=False):
|
|
949
982
|
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
950
983
|
# in build mode, or not executed, just return representation
|
|
@@ -978,7 +1011,7 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
|
978
1011
|
return self._to_str(representation=False)
|
|
979
1012
|
|
|
980
1013
|
def __repr__(self):
|
|
981
|
-
return self._to_str(representation=
|
|
1014
|
+
return self._to_str(representation=True)
|
|
982
1015
|
|
|
983
1016
|
@property
|
|
984
1017
|
def dtype(self):
|
|
@@ -1073,11 +1106,11 @@ class Series(HasShapeTileable, _ToPandasMixin):
|
|
|
1073
1106
|
--------
|
|
1074
1107
|
>>> import maxframe.dataframe as md
|
|
1075
1108
|
>>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
|
|
1076
|
-
>>> s.ndim
|
|
1109
|
+
>>> s.ndim
|
|
1077
1110
|
1
|
|
1078
1111
|
|
|
1079
1112
|
>>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
|
|
1080
|
-
>>> df.ndim
|
|
1113
|
+
>>> df.ndim
|
|
1081
1114
|
2
|
|
1082
1115
|
"""
|
|
1083
1116
|
return super().ndim
|
|
@@ -1501,6 +1534,17 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1501
1534
|
refresh_index_value(self)
|
|
1502
1535
|
refresh_dtypes(self)
|
|
1503
1536
|
|
|
1537
|
+
def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
|
|
1538
|
+
self._dtypes = dtypes
|
|
1539
|
+
self._columns_value = parse_index(dtypes.index, store_data=True)
|
|
1540
|
+
self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
|
|
1541
|
+
new_shape = list(self._shape)
|
|
1542
|
+
new_shape[-1] = len(dtypes)
|
|
1543
|
+
self._shape = tuple(new_shape)
|
|
1544
|
+
|
|
1545
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
1546
|
+
self.refresh_from_dtypes(table_meta.pd_column_dtypes)
|
|
1547
|
+
|
|
1504
1548
|
@property
|
|
1505
1549
|
def dtypes(self):
|
|
1506
1550
|
dt = getattr(self, "_dtypes", None)
|
|
@@ -1644,6 +1688,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
|
|
|
1644
1688
|
raise NotImplementedError
|
|
1645
1689
|
|
|
1646
1690
|
corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
|
|
1691
|
+
if corner_data is None:
|
|
1692
|
+
return
|
|
1647
1693
|
|
|
1648
1694
|
buf = StringIO()
|
|
1649
1695
|
max_rows = pd.get_option("display.max_rows")
|
|
@@ -1739,11 +1785,11 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1739
1785
|
--------
|
|
1740
1786
|
>>> import maxframe.dataframe as md
|
|
1741
1787
|
>>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
|
|
1742
|
-
>>> s.ndim
|
|
1788
|
+
>>> s.ndim
|
|
1743
1789
|
1
|
|
1744
1790
|
|
|
1745
1791
|
>>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
|
|
1746
|
-
>>> df.ndim
|
|
1792
|
+
>>> df.ndim
|
|
1747
1793
|
2
|
|
1748
1794
|
"""
|
|
1749
1795
|
return super().ndim
|
|
@@ -1997,12 +2043,6 @@ class DataFrame(HasShapeTileable, _ToPandasMixin):
|
|
|
1997
2043
|
Berkeley 25.0 77.0 298.15
|
|
1998
2044
|
"""
|
|
1999
2045
|
|
|
2000
|
-
def apply_if_callable(maybe_callable, obj, **kwargs):
|
|
2001
|
-
if callable(maybe_callable):
|
|
2002
|
-
return maybe_callable(obj, **kwargs)
|
|
2003
|
-
|
|
2004
|
-
return maybe_callable
|
|
2005
|
-
|
|
2006
2046
|
data = self.copy()
|
|
2007
2047
|
|
|
2008
2048
|
for k, v in kwargs.items():
|
|
@@ -2197,6 +2237,9 @@ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
|
|
|
2197
2237
|
pd.Categorical(categories).categories, store_data=True
|
|
2198
2238
|
)
|
|
2199
2239
|
|
|
2240
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
2241
|
+
pass
|
|
2242
|
+
|
|
2200
2243
|
def _to_str(self, representation=False):
|
|
2201
2244
|
if is_build_mode() or len(self._executed_sessions) == 0:
|
|
2202
2245
|
# in build mode, or not executed, just return representation
|
|
@@ -2347,6 +2390,9 @@ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
|
|
|
2347
2390
|
data_params["name"] = self.chunks[0].name
|
|
2348
2391
|
self._data_params.update(data_params)
|
|
2349
2392
|
|
|
2393
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
2394
|
+
pass
|
|
2395
|
+
|
|
2350
2396
|
def ensure_data(self):
|
|
2351
2397
|
from .fetch.core import DataFrameFetch
|
|
2352
2398
|
|
|
@@ -22,7 +22,7 @@ from pandas._libs.tslibs import timezones
|
|
|
22
22
|
from pandas.tseries.frequencies import to_offset
|
|
23
23
|
from pandas.tseries.offsets import Tick
|
|
24
24
|
|
|
25
|
-
from ... import opcodes
|
|
25
|
+
from ... import opcodes
|
|
26
26
|
from ...core import OutputType
|
|
27
27
|
from ...serialization.serializables import AnyField, BoolField, Int64Field, StringField
|
|
28
28
|
from ...utils import no_default, pd_release_version
|
|
@@ -117,7 +117,7 @@ def generate_range_count(
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
|
|
120
|
-
_op_type_ =
|
|
120
|
+
_op_type_ = opcodes.DATE_RANGE
|
|
121
121
|
|
|
122
122
|
start = AnyField("start")
|
|
123
123
|
end = AnyField("end")
|