maxframe 1.0.0rc3__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 1.1.0__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cpython-310-aarch64-linux-gnu.so +0 -0
- maxframe/codegen.py +1 -0
- maxframe/config/config.py +16 -1
- maxframe/conftest.py +52 -14
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/graph/core.cpython-310-aarch64-linux-gnu.so +0 -0
- maxframe/core/operator/base.py +2 -0
- maxframe/dataframe/arithmetic/docstring.py +26 -2
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
- maxframe/dataframe/core.py +26 -2
- maxframe/dataframe/datasource/read_odps_query.py +116 -28
- maxframe/dataframe/datasource/read_odps_table.py +3 -1
- maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
- maxframe/dataframe/datastore/to_odps.py +7 -0
- maxframe/dataframe/extensions/__init__.py +8 -0
- maxframe/dataframe/extensions/apply_chunk.py +649 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +314 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/__init__.py +1 -0
- maxframe/dataframe/groupby/aggregation.py +1 -0
- maxframe/dataframe/groupby/apply.py +9 -1
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
- maxframe/dataframe/groupby/transform.py +8 -2
- maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +11 -0
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +1 -1
- maxframe/dataframe/merge/tests/test_merge.py +3 -1
- maxframe/dataframe/misc/apply.py +3 -0
- maxframe/dataframe/misc/drop_duplicates.py +23 -2
- maxframe/dataframe/misc/map.py +3 -1
- maxframe/dataframe/misc/tests/test_misc.py +24 -2
- maxframe/dataframe/misc/transform.py +22 -13
- maxframe/dataframe/reduction/__init__.py +3 -0
- maxframe/dataframe/reduction/aggregation.py +1 -0
- maxframe/dataframe/reduction/median.py +56 -0
- maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
- maxframe/dataframe/statistics/quantile.py +8 -2
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/tests/test_utils.py +60 -0
- maxframe/dataframe/utils.py +110 -7
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/io/objects/tests/test_object_io.py +39 -12
- maxframe/io/odpsio/arrow.py +30 -2
- maxframe/io/odpsio/schema.py +28 -8
- maxframe/io/odpsio/tableio.py +55 -133
- maxframe/io/odpsio/tests/test_schema.py +40 -4
- maxframe/io/odpsio/tests/test_tableio.py +5 -5
- maxframe/io/odpsio/tests/test_volumeio.py +35 -11
- maxframe/io/odpsio/volumeio.py +36 -6
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +3 -3
- maxframe/learn/contrib/xgboost/predict.py +8 -39
- maxframe/learn/contrib/xgboost/train.py +4 -3
- maxframe/lib/mmh3.cpython-310-aarch64-linux-gnu.so +0 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/opcodes.py +10 -1
- maxframe/protocol.py +6 -1
- maxframe/serialization/core.cpython-310-aarch64-linux-gnu.so +0 -0
- maxframe/serialization/core.pyx +13 -1
- maxframe/serialization/pandas.py +50 -20
- maxframe/serialization/serializables/core.py +24 -5
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +8 -1
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +19 -7
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/concatenate.py +23 -20
- maxframe/tensor/merge/vstack.py +5 -1
- maxframe/tensor/misc/transpose.py +1 -1
- maxframe/tests/utils.py +16 -0
- maxframe/udf.py +27 -0
- maxframe/utils.py +64 -14
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +570 -554
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +28 -10
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/odps.py +104 -20
- maxframe_client/session/task.py +42 -26
- maxframe_client/session/tests/test_task.py +0 -4
- maxframe_client/tests/test_session.py +44 -12
- {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
|
Binary file
|
maxframe/codegen.py
CHANGED
|
@@ -347,6 +347,7 @@ BUILTIN_ENGINE_SPE = "SPE"
|
|
|
347
347
|
BUILTIN_ENGINE_MCSQL = "MCSQL"
|
|
348
348
|
|
|
349
349
|
FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
|
|
350
|
+
ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
|
|
350
351
|
|
|
351
352
|
|
|
352
353
|
class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
maxframe/config/config.py
CHANGED
|
@@ -343,6 +343,9 @@ default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remo
|
|
|
343
343
|
default_options.register_option(
|
|
344
344
|
"sql.generate_comments", True, validator=is_bool, remote=True
|
|
345
345
|
)
|
|
346
|
+
default_options.register_option(
|
|
347
|
+
"sql.auto_use_common_image", True, validator=is_bool, remote=True
|
|
348
|
+
)
|
|
346
349
|
default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
|
|
347
350
|
|
|
348
351
|
default_options.register_option("is_production", False, validator=is_bool, remote=True)
|
|
@@ -371,13 +374,25 @@ default_options.register_option(
|
|
|
371
374
|
validator=is_numeric,
|
|
372
375
|
remote=True,
|
|
373
376
|
)
|
|
377
|
+
default_options.register_option(
|
|
378
|
+
"session.quota_name", None, validator=is_null | is_string, remote=True
|
|
379
|
+
)
|
|
380
|
+
default_options.register_option(
|
|
381
|
+
"session.enable_schema", None, validator=is_null | is_bool, remote=True
|
|
382
|
+
)
|
|
383
|
+
default_options.register_option(
|
|
384
|
+
"session.enable_high_availability", None, validator=is_null | is_bool, remote=True
|
|
385
|
+
)
|
|
386
|
+
default_options.register_option(
|
|
387
|
+
"session.default_schema", None, validator=is_null | is_string, remote=True
|
|
388
|
+
)
|
|
374
389
|
default_options.register_option(
|
|
375
390
|
"session.upload_batch_size",
|
|
376
391
|
_DEFAULT_UPLOAD_BATCH_SIZE,
|
|
377
392
|
validator=is_integer,
|
|
378
393
|
)
|
|
379
394
|
default_options.register_option(
|
|
380
|
-
"session.table_lifecycle", None, validator=is_null | is_integer
|
|
395
|
+
"session.table_lifecycle", None, validator=is_null | is_integer, remote=True
|
|
381
396
|
)
|
|
382
397
|
default_options.register_option(
|
|
383
398
|
"session.temp_table_lifecycle",
|
maxframe/conftest.py
CHANGED
|
@@ -14,10 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
import faulthandler
|
|
16
16
|
import os
|
|
17
|
-
from configparser import ConfigParser, NoOptionError
|
|
17
|
+
from configparser import ConfigParser, NoOptionError, NoSectionError
|
|
18
18
|
|
|
19
19
|
import pytest
|
|
20
20
|
from odps import ODPS
|
|
21
|
+
from odps.accounts import BearerTokenAccount
|
|
21
22
|
|
|
22
23
|
from .config import options
|
|
23
24
|
|
|
@@ -34,12 +35,23 @@ def test_config():
|
|
|
34
35
|
return config
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
38
|
+
def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
|
|
39
|
+
try:
|
|
40
|
+
access_id = test_config.get(section_name, "access_id")
|
|
41
|
+
except NoOptionError:
|
|
42
|
+
access_id = test_config.get("odps", "access_id")
|
|
43
|
+
try:
|
|
44
|
+
secret_access_key = test_config.get(section_name, "secret_access_key")
|
|
45
|
+
except NoOptionError:
|
|
46
|
+
secret_access_key = test_config.get("odps", "secret_access_key")
|
|
47
|
+
try:
|
|
48
|
+
project = test_config.get(section_name, "project")
|
|
49
|
+
except NoOptionError:
|
|
50
|
+
project = test_config.get("odps", "project")
|
|
51
|
+
try:
|
|
52
|
+
endpoint = test_config.get(section_name, "endpoint")
|
|
53
|
+
except NoOptionError:
|
|
54
|
+
endpoint = test_config.get("odps", "endpoint")
|
|
43
55
|
try:
|
|
44
56
|
tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
|
|
45
57
|
except NoOptionError:
|
|
@@ -55,12 +67,31 @@ def odps_envs(test_config):
|
|
|
55
67
|
],
|
|
56
68
|
}
|
|
57
69
|
token = entry.get_project().generate_auth_token(policy, "bearer", 5)
|
|
70
|
+
return ODPS(
|
|
71
|
+
account=BearerTokenAccount(token, 5),
|
|
72
|
+
project=project,
|
|
73
|
+
endpoint=endpoint,
|
|
74
|
+
tunnel_endpoint=tunnel_endpoint,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@pytest.fixture(scope="session")
|
|
79
|
+
def odps_with_schema(test_config):
|
|
80
|
+
try:
|
|
81
|
+
return _get_odps_env(test_config, "odps_with_schema")
|
|
82
|
+
except NoSectionError:
|
|
83
|
+
pytest.skip("Need to specify odps_with_schema section in test.conf")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
87
|
+
def odps_envs(test_config):
|
|
88
|
+
entry = _get_odps_env(test_config, "odps")
|
|
58
89
|
|
|
59
|
-
os.environ["ODPS_BEARER_TOKEN"] = token
|
|
60
|
-
os.environ["ODPS_PROJECT_NAME"] = project
|
|
61
|
-
os.environ["ODPS_ENDPOINT"] = endpoint
|
|
62
|
-
if tunnel_endpoint:
|
|
63
|
-
os.environ["ODPS_TUNNEL_ENDPOINT"] = tunnel_endpoint
|
|
90
|
+
os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
|
|
91
|
+
os.environ["ODPS_PROJECT_NAME"] = entry.project
|
|
92
|
+
os.environ["ODPS_ENDPOINT"] = entry.endpoint
|
|
93
|
+
if entry.tunnel_endpoint:
|
|
94
|
+
os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
|
|
64
95
|
|
|
65
96
|
try:
|
|
66
97
|
yield
|
|
@@ -95,7 +126,14 @@ def oss_config():
|
|
|
95
126
|
oss_rolearn = config.get("oss", "rolearn")
|
|
96
127
|
|
|
97
128
|
options.service_role_arn = oss_rolearn
|
|
98
|
-
|
|
129
|
+
if "test" in oss_endpoint:
|
|
130
|
+
oss_svc_endpoint = oss_endpoint
|
|
131
|
+
else:
|
|
132
|
+
endpoint_parts = oss_endpoint.split(".", 1)
|
|
133
|
+
if "-internal" not in endpoint_parts[0]:
|
|
134
|
+
endpoint_parts[0] += "-internal"
|
|
135
|
+
oss_svc_endpoint = ".".join(endpoint_parts)
|
|
136
|
+
options.object_cache_url = f"oss://{oss_svc_endpoint}/{oss_bucket_name}"
|
|
99
137
|
|
|
100
138
|
config.oss_config = (
|
|
101
139
|
oss_access_id,
|
|
@@ -110,7 +148,7 @@ def oss_config():
|
|
|
110
148
|
config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
|
|
111
149
|
config.oss_rolearn = oss_rolearn
|
|
112
150
|
yield config
|
|
113
|
-
except (
|
|
151
|
+
except (NoSectionError, NoOptionError, ImportError):
|
|
114
152
|
return None
|
|
115
153
|
finally:
|
|
116
154
|
options.service_role_arn = old_role_arn
|
|
Binary file
|
maxframe/core/operator/base.py
CHANGED
|
@@ -86,6 +86,8 @@ class SchedulingHint(Serializable):
|
|
|
86
86
|
# `gpu` indicates that if the operator should be executed on the GPU.
|
|
87
87
|
gpu = BoolField("gpu", default=None)
|
|
88
88
|
priority = Int32Field("priority", default=None)
|
|
89
|
+
expect_engine = StringField("expect_engine", default=None)
|
|
90
|
+
expect_resources = DictField("expect_resources", FieldTypes.string, default=None)
|
|
89
91
|
|
|
90
92
|
@classproperty
|
|
91
93
|
@lru_cache(1)
|
|
@@ -185,7 +185,6 @@ e NaN
|
|
|
185
185
|
dtype: float64
|
|
186
186
|
"""
|
|
187
187
|
|
|
188
|
-
# FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/48
|
|
189
188
|
_flex_comp_doc_FRAME = """
|
|
190
189
|
Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
|
|
191
190
|
Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
|
|
@@ -291,7 +290,7 @@ C True False
|
|
|
291
290
|
|
|
292
291
|
Compare to a DataFrame of different shape.
|
|
293
292
|
|
|
294
|
-
>>> other =
|
|
293
|
+
>>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
|
|
295
294
|
... index=['A', 'B', 'C', 'D'])
|
|
296
295
|
>>> other.execute()
|
|
297
296
|
revenue
|
|
@@ -306,6 +305,31 @@ A False False
|
|
|
306
305
|
B False False
|
|
307
306
|
C False True
|
|
308
307
|
D False False
|
|
308
|
+
|
|
309
|
+
Compare to a MultiIndex by level.
|
|
310
|
+
|
|
311
|
+
>>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
|
|
312
|
+
... 'revenue': [100, 250, 300, 200, 175, 225]}},
|
|
313
|
+
... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
|
|
314
|
+
... ['A', 'B', 'C', 'A', 'B', 'C']])
|
|
315
|
+
>>> df_multindex.execute()
|
|
316
|
+
cost revenue
|
|
317
|
+
Q1 A 250 100
|
|
318
|
+
B 150 250
|
|
319
|
+
C 100 300
|
|
320
|
+
Q2 A 150 200
|
|
321
|
+
B 300 175
|
|
322
|
+
C 220 225
|
|
323
|
+
|
|
324
|
+
>>> df.le(df_multindex, level=1).execute()
|
|
325
|
+
cost revenue
|
|
326
|
+
Q1 A True True
|
|
327
|
+
B True True
|
|
328
|
+
C True True
|
|
329
|
+
Q2 A False True
|
|
330
|
+
B True False
|
|
331
|
+
C True False
|
|
332
|
+
|
|
309
333
|
"""
|
|
310
334
|
|
|
311
335
|
|
|
@@ -51,6 +51,8 @@ dtype: bool
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
|
|
54
|
-
def eq(df, other, axis="columns", level=None):
|
|
55
|
-
op = DataFrameEqual(
|
|
54
|
+
def eq(df, other, axis="columns", level=None, fill_value=None):
|
|
55
|
+
op = DataFrameEqual(
|
|
56
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
57
|
+
)
|
|
56
58
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
|
|
55
|
-
def gt(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameGreater(
|
|
55
|
+
def gt(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameGreater(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
|
|
55
|
-
def ge(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameGreaterEqual(
|
|
55
|
+
def ge(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameGreaterEqual(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -52,6 +52,6 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
|
|
55
|
-
def lt(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
|
|
55
|
+
def lt(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
|
|
57
57
|
return op(df, other)
|
|
@@ -52,6 +52,8 @@ dtype: bool
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
@bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
|
|
55
|
-
def le(df, other, axis="columns", level=None):
|
|
56
|
-
op = DataFrameLessEqual(
|
|
55
|
+
def le(df, other, axis="columns", level=None, fill_value=None):
|
|
56
|
+
op = DataFrameLessEqual(
|
|
57
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
58
|
+
)
|
|
57
59
|
return op(df, other)
|
|
@@ -51,6 +51,8 @@ dtype: bool
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
@bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
|
|
54
|
-
def ne(df, other, axis="columns", level=None):
|
|
55
|
-
op = DataFrameNotEqual(
|
|
54
|
+
def ne(df, other, axis="columns", level=None, fill_value=None):
|
|
55
|
+
op = DataFrameNotEqual(
|
|
56
|
+
axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
|
|
57
|
+
)
|
|
56
58
|
return op(df, other)
|
|
@@ -22,6 +22,7 @@ import pandas as pd
|
|
|
22
22
|
import pytest
|
|
23
23
|
|
|
24
24
|
from ....core import OperatorType
|
|
25
|
+
from ....tests.utils import assert_mf_index_dtype
|
|
25
26
|
from ....utils import dataslots
|
|
26
27
|
from ...core import IndexValue
|
|
27
28
|
from ...datasource.dataframe import from_pandas
|
|
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
164
165
|
pd.testing.assert_index_equal(
|
|
165
166
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
166
167
|
)
|
|
167
|
-
|
|
168
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
168
169
|
pd.testing.assert_index_equal(
|
|
169
170
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
170
171
|
)
|
|
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
|
|
|
176
177
|
pd.testing.assert_index_equal(
|
|
177
178
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
178
179
|
)
|
|
179
|
-
|
|
180
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
180
181
|
pd.testing.assert_index_equal(
|
|
181
182
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
182
183
|
)
|
|
@@ -370,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
|
|
|
370
371
|
pd.testing.assert_index_equal(
|
|
371
372
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
372
373
|
)
|
|
373
|
-
|
|
374
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
374
375
|
pd.testing.assert_index_equal(
|
|
375
376
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
376
377
|
)
|
|
@@ -403,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
|
|
|
403
404
|
pd.testing.assert_index_equal(
|
|
404
405
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
405
406
|
)
|
|
406
|
-
|
|
407
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
407
408
|
pd.testing.assert_index_equal(
|
|
408
409
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
409
410
|
)
|
|
@@ -433,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
|
|
|
433
434
|
pd.testing.assert_index_equal(
|
|
434
435
|
df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
|
|
435
436
|
)
|
|
436
|
-
|
|
437
|
+
assert_mf_index_dtype(df6.index_value.value, np.int64)
|
|
437
438
|
pd.testing.assert_index_equal(
|
|
438
439
|
df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
439
440
|
)
|
|
@@ -468,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
|
|
|
468
469
|
pd.testing.assert_index_equal(
|
|
469
470
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
470
471
|
)
|
|
471
|
-
|
|
472
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
472
473
|
pd.testing.assert_index_equal(
|
|
473
474
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
474
475
|
)
|
|
@@ -501,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
|
|
|
501
502
|
pd.testing.assert_index_equal(
|
|
502
503
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
503
504
|
)
|
|
504
|
-
|
|
505
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
505
506
|
pd.testing.assert_index_equal(
|
|
506
507
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
507
508
|
)
|
|
@@ -534,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
|
|
|
534
535
|
pd.testing.assert_index_equal(
|
|
535
536
|
df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
|
|
536
537
|
)
|
|
537
|
-
|
|
538
|
+
assert_mf_index_dtype(df3.index_value.value, np.int64)
|
|
538
539
|
pd.testing.assert_index_equal(
|
|
539
540
|
df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
540
541
|
)
|
|
@@ -558,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
|
|
|
558
559
|
pd.testing.assert_index_equal(
|
|
559
560
|
df2.columns_value.to_pandas(), func_opts.func(data, data).columns
|
|
560
561
|
)
|
|
561
|
-
|
|
562
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
562
563
|
pd.testing.assert_index_equal(
|
|
563
564
|
df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
|
|
564
565
|
)
|
|
@@ -590,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
|
|
|
590
591
|
pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
|
|
591
592
|
|
|
592
593
|
pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
|
|
593
|
-
|
|
594
|
+
assert_mf_index_dtype(result.index_value.value, np.int64)
|
|
594
595
|
|
|
595
596
|
pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
|
|
596
|
-
|
|
597
|
+
assert_mf_index_dtype(result2.index_value.value, np.int64)
|
|
597
598
|
|
|
598
599
|
pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
|
|
599
|
-
|
|
600
|
+
assert_mf_index_dtype(result3.index_value.value, np.int64)
|
|
600
601
|
|
|
601
602
|
pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
|
|
602
|
-
|
|
603
|
+
assert_mf_index_dtype(result4.index_value.value, np.int64)
|
|
603
604
|
|
|
604
605
|
pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
|
|
605
|
-
|
|
606
|
+
assert_mf_index_dtype(result5.index_value.value, np.int64)
|
|
606
607
|
|
|
607
608
|
if "builtin_function_or_method" not in str(type(func_opts.func)):
|
|
608
609
|
# skip NotImplemented test for comparison function
|
|
@@ -679,7 +680,7 @@ def test_abs():
|
|
|
679
680
|
pd.testing.assert_index_equal(
|
|
680
681
|
df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
|
|
681
682
|
)
|
|
682
|
-
|
|
683
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
683
684
|
assert df2.shape == (10, 10)
|
|
684
685
|
|
|
685
686
|
|
|
@@ -697,7 +698,7 @@ def test_not():
|
|
|
697
698
|
pd.testing.assert_index_equal(
|
|
698
699
|
df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
|
|
699
700
|
)
|
|
700
|
-
|
|
701
|
+
assert_mf_index_dtype(df2.index_value.value, np.int64)
|
|
701
702
|
assert df2.shape == (10, 10)
|
|
702
703
|
|
|
703
704
|
|
maxframe/dataframe/core.py
CHANGED
|
@@ -142,6 +142,14 @@ class IndexValue(Serializable):
|
|
|
142
142
|
_data = NDArrayField("data")
|
|
143
143
|
_dtype = DataTypeField("dtype")
|
|
144
144
|
|
|
145
|
+
@property
|
|
146
|
+
def dtype(self):
|
|
147
|
+
return getattr(self, "_dtype", None)
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def inferred_type(self):
|
|
151
|
+
return "floating" if self.dtype.kind == "f" else "integer"
|
|
152
|
+
|
|
145
153
|
class RangeIndex(IndexBase):
|
|
146
154
|
_name = AnyField("name")
|
|
147
155
|
_slice = SliceField("slice")
|
|
@@ -243,6 +251,10 @@ class IndexValue(Serializable):
|
|
|
243
251
|
_data = NDArrayField("data")
|
|
244
252
|
_dtype = DataTypeField("dtype")
|
|
245
253
|
|
|
254
|
+
@property
|
|
255
|
+
def dtype(self):
|
|
256
|
+
return getattr(self, "_dtype", None)
|
|
257
|
+
|
|
246
258
|
@property
|
|
247
259
|
def inferred_type(self):
|
|
248
260
|
return "integer"
|
|
@@ -254,6 +266,10 @@ class IndexValue(Serializable):
|
|
|
254
266
|
_data = NDArrayField("data")
|
|
255
267
|
_dtype = DataTypeField("dtype")
|
|
256
268
|
|
|
269
|
+
@property
|
|
270
|
+
def dtype(self):
|
|
271
|
+
return getattr(self, "_dtype", None)
|
|
272
|
+
|
|
257
273
|
@property
|
|
258
274
|
def inferred_type(self):
|
|
259
275
|
return "integer"
|
|
@@ -265,6 +281,10 @@ class IndexValue(Serializable):
|
|
|
265
281
|
_data = NDArrayField("data")
|
|
266
282
|
_dtype = DataTypeField("dtype")
|
|
267
283
|
|
|
284
|
+
@property
|
|
285
|
+
def dtype(self):
|
|
286
|
+
return getattr(self, "_dtype", None)
|
|
287
|
+
|
|
268
288
|
@property
|
|
269
289
|
def inferred_type(self):
|
|
270
290
|
return "floating"
|
|
@@ -1514,8 +1534,7 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1514
1534
|
refresh_index_value(self)
|
|
1515
1535
|
refresh_dtypes(self)
|
|
1516
1536
|
|
|
1517
|
-
def
|
|
1518
|
-
dtypes = table_meta.pd_column_dtypes
|
|
1537
|
+
def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
|
|
1519
1538
|
self._dtypes = dtypes
|
|
1520
1539
|
self._columns_value = parse_index(dtypes.index, store_data=True)
|
|
1521
1540
|
self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
|
|
@@ -1523,6 +1542,9 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
|
|
|
1523
1542
|
new_shape[-1] = len(dtypes)
|
|
1524
1543
|
self._shape = tuple(new_shape)
|
|
1525
1544
|
|
|
1545
|
+
def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
|
|
1546
|
+
self.refresh_from_dtypes(table_meta.pd_column_dtypes)
|
|
1547
|
+
|
|
1526
1548
|
@property
|
|
1527
1549
|
def dtypes(self):
|
|
1528
1550
|
dt = getattr(self, "_dtypes", None)
|
|
@@ -1666,6 +1688,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
|
|
|
1666
1688
|
raise NotImplementedError
|
|
1667
1689
|
|
|
1668
1690
|
corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
|
|
1691
|
+
if corner_data is None:
|
|
1692
|
+
return
|
|
1669
1693
|
|
|
1670
1694
|
buf = StringIO()
|
|
1671
1695
|
max_rows = pd.get_option("display.max_rows")
|