maxframe 1.0.0rc3__cp39-cp39-macosx_10_9_universal2.whl → 1.1.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (112) hide show
  1. maxframe/_utils.cpython-39-darwin.so +0 -0
  2. maxframe/codegen.py +1 -0
  3. maxframe/config/config.py +16 -1
  4. maxframe/conftest.py +52 -14
  5. maxframe/core/entity/executable.py +1 -1
  6. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  7. maxframe/core/operator/base.py +2 -0
  8. maxframe/dataframe/arithmetic/docstring.py +26 -2
  9. maxframe/dataframe/arithmetic/equal.py +4 -2
  10. maxframe/dataframe/arithmetic/greater.py +4 -2
  11. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  12. maxframe/dataframe/arithmetic/less.py +2 -2
  13. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  14. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  15. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +17 -16
  16. maxframe/dataframe/core.py +26 -2
  17. maxframe/dataframe/datasource/read_odps_query.py +116 -28
  18. maxframe/dataframe/datasource/read_odps_table.py +3 -1
  19. maxframe/dataframe/datasource/tests/test_datasource.py +93 -12
  20. maxframe/dataframe/datastore/to_odps.py +7 -0
  21. maxframe/dataframe/extensions/__init__.py +8 -0
  22. maxframe/dataframe/extensions/apply_chunk.py +649 -0
  23. maxframe/dataframe/extensions/flatjson.py +131 -0
  24. maxframe/dataframe/extensions/flatmap.py +314 -0
  25. maxframe/dataframe/extensions/reshuffle.py +1 -1
  26. maxframe/dataframe/extensions/tests/test_apply_chunk.py +186 -0
  27. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  28. maxframe/dataframe/groupby/__init__.py +1 -0
  29. maxframe/dataframe/groupby/aggregation.py +1 -0
  30. maxframe/dataframe/groupby/apply.py +9 -1
  31. maxframe/dataframe/groupby/core.py +1 -1
  32. maxframe/dataframe/groupby/fill.py +4 -1
  33. maxframe/dataframe/groupby/getitem.py +6 -0
  34. maxframe/dataframe/groupby/tests/test_groupby.py +1 -1
  35. maxframe/dataframe/groupby/transform.py +8 -2
  36. maxframe/dataframe/indexing/add_prefix_suffix.py +1 -1
  37. maxframe/dataframe/indexing/loc.py +6 -4
  38. maxframe/dataframe/indexing/rename.py +11 -0
  39. maxframe/dataframe/initializer.py +11 -1
  40. maxframe/dataframe/merge/__init__.py +9 -1
  41. maxframe/dataframe/merge/concat.py +41 -31
  42. maxframe/dataframe/merge/merge.py +1 -1
  43. maxframe/dataframe/merge/tests/test_merge.py +3 -1
  44. maxframe/dataframe/misc/apply.py +3 -0
  45. maxframe/dataframe/misc/drop_duplicates.py +23 -2
  46. maxframe/dataframe/misc/map.py +3 -1
  47. maxframe/dataframe/misc/tests/test_misc.py +24 -2
  48. maxframe/dataframe/misc/transform.py +22 -13
  49. maxframe/dataframe/reduction/__init__.py +3 -0
  50. maxframe/dataframe/reduction/aggregation.py +1 -0
  51. maxframe/dataframe/reduction/median.py +56 -0
  52. maxframe/dataframe/reduction/tests/test_reduction.py +17 -7
  53. maxframe/dataframe/statistics/quantile.py +8 -2
  54. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  55. maxframe/dataframe/tests/test_initializer.py +33 -2
  56. maxframe/dataframe/tests/test_utils.py +60 -0
  57. maxframe/dataframe/utils.py +110 -7
  58. maxframe/dataframe/window/expanding.py +5 -3
  59. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  60. maxframe/io/objects/tests/test_object_io.py +39 -12
  61. maxframe/io/odpsio/arrow.py +30 -2
  62. maxframe/io/odpsio/schema.py +28 -8
  63. maxframe/io/odpsio/tableio.py +55 -133
  64. maxframe/io/odpsio/tests/test_schema.py +40 -4
  65. maxframe/io/odpsio/tests/test_tableio.py +5 -5
  66. maxframe/io/odpsio/tests/test_volumeio.py +35 -11
  67. maxframe/io/odpsio/volumeio.py +36 -6
  68. maxframe/learn/contrib/__init__.py +3 -1
  69. maxframe/learn/contrib/graph/__init__.py +15 -0
  70. maxframe/learn/contrib/graph/connected_components.py +215 -0
  71. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  72. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  73. maxframe/learn/contrib/llm/__init__.py +16 -0
  74. maxframe/learn/contrib/llm/core.py +54 -0
  75. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  76. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  77. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  78. maxframe/learn/contrib/llm/text.py +42 -0
  79. maxframe/learn/contrib/xgboost/classifier.py +3 -3
  80. maxframe/learn/contrib/xgboost/predict.py +8 -39
  81. maxframe/learn/contrib/xgboost/train.py +4 -3
  82. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  83. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  84. maxframe/opcodes.py +10 -1
  85. maxframe/protocol.py +6 -1
  86. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  87. maxframe/serialization/core.pyx +13 -1
  88. maxframe/serialization/pandas.py +50 -20
  89. maxframe/serialization/serializables/core.py +24 -5
  90. maxframe/serialization/serializables/field_type.py +4 -1
  91. maxframe/serialization/serializables/tests/test_serializable.py +8 -1
  92. maxframe/serialization/tests/test_serial.py +2 -1
  93. maxframe/session.py +9 -2
  94. maxframe/tensor/__init__.py +19 -7
  95. maxframe/tensor/indexing/getitem.py +2 -0
  96. maxframe/tensor/merge/concatenate.py +23 -20
  97. maxframe/tensor/merge/vstack.py +5 -1
  98. maxframe/tensor/misc/transpose.py +1 -1
  99. maxframe/tests/utils.py +16 -0
  100. maxframe/udf.py +27 -0
  101. maxframe/utils.py +64 -14
  102. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/METADATA +2 -2
  103. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/RECORD +112 -96
  104. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/WHEEL +1 -1
  105. maxframe_client/clients/framedriver.py +4 -1
  106. maxframe_client/fetcher.py +28 -10
  107. maxframe_client/session/consts.py +3 -0
  108. maxframe_client/session/odps.py +104 -20
  109. maxframe_client/session/task.py +42 -26
  110. maxframe_client/session/tests/test_task.py +0 -4
  111. maxframe_client/tests/test_session.py +44 -12
  112. {maxframe-1.0.0rc3.dist-info → maxframe-1.1.0.dist-info}/top_level.txt +0 -0
Binary file
maxframe/codegen.py CHANGED
@@ -347,6 +347,7 @@ BUILTIN_ENGINE_SPE = "SPE"
347
347
  BUILTIN_ENGINE_MCSQL = "MCSQL"
348
348
 
349
349
  FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
350
+ ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
350
351
 
351
352
 
352
353
  class BigDagCodeGenerator(metaclass=abc.ABCMeta):
maxframe/config/config.py CHANGED
@@ -343,6 +343,9 @@ default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remo
343
343
  default_options.register_option(
344
344
  "sql.generate_comments", True, validator=is_bool, remote=True
345
345
  )
346
+ default_options.register_option(
347
+ "sql.auto_use_common_image", True, validator=is_bool, remote=True
348
+ )
346
349
  default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
347
350
 
348
351
  default_options.register_option("is_production", False, validator=is_bool, remote=True)
@@ -371,13 +374,25 @@ default_options.register_option(
371
374
  validator=is_numeric,
372
375
  remote=True,
373
376
  )
377
+ default_options.register_option(
378
+ "session.quota_name", None, validator=is_null | is_string, remote=True
379
+ )
380
+ default_options.register_option(
381
+ "session.enable_schema", None, validator=is_null | is_bool, remote=True
382
+ )
383
+ default_options.register_option(
384
+ "session.enable_high_availability", None, validator=is_null | is_bool, remote=True
385
+ )
386
+ default_options.register_option(
387
+ "session.default_schema", None, validator=is_null | is_string, remote=True
388
+ )
374
389
  default_options.register_option(
375
390
  "session.upload_batch_size",
376
391
  _DEFAULT_UPLOAD_BATCH_SIZE,
377
392
  validator=is_integer,
378
393
  )
379
394
  default_options.register_option(
380
- "session.table_lifecycle", None, validator=is_null | is_integer
395
+ "session.table_lifecycle", None, validator=is_null | is_integer, remote=True
381
396
  )
382
397
  default_options.register_option(
383
398
  "session.temp_table_lifecycle",
maxframe/conftest.py CHANGED
@@ -14,10 +14,11 @@
14
14
 
15
15
  import faulthandler
16
16
  import os
17
- from configparser import ConfigParser, NoOptionError
17
+ from configparser import ConfigParser, NoOptionError, NoSectionError
18
18
 
19
19
  import pytest
20
20
  from odps import ODPS
21
+ from odps.accounts import BearerTokenAccount
21
22
 
22
23
  from .config import options
23
24
 
@@ -34,12 +35,23 @@ def test_config():
34
35
  return config
35
36
 
36
37
 
37
- @pytest.fixture(scope="session", autouse=True)
38
- def odps_envs(test_config):
39
- access_id = test_config.get("odps", "access_id")
40
- secret_access_key = test_config.get("odps", "secret_access_key")
41
- project = test_config.get("odps", "project")
42
- endpoint = test_config.get("odps", "endpoint")
38
+ def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
39
+ try:
40
+ access_id = test_config.get(section_name, "access_id")
41
+ except NoOptionError:
42
+ access_id = test_config.get("odps", "access_id")
43
+ try:
44
+ secret_access_key = test_config.get(section_name, "secret_access_key")
45
+ except NoOptionError:
46
+ secret_access_key = test_config.get("odps", "secret_access_key")
47
+ try:
48
+ project = test_config.get(section_name, "project")
49
+ except NoOptionError:
50
+ project = test_config.get("odps", "project")
51
+ try:
52
+ endpoint = test_config.get(section_name, "endpoint")
53
+ except NoOptionError:
54
+ endpoint = test_config.get("odps", "endpoint")
43
55
  try:
44
56
  tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
45
57
  except NoOptionError:
@@ -55,12 +67,31 @@ def odps_envs(test_config):
55
67
  ],
56
68
  }
57
69
  token = entry.get_project().generate_auth_token(policy, "bearer", 5)
70
+ return ODPS(
71
+ account=BearerTokenAccount(token, 5),
72
+ project=project,
73
+ endpoint=endpoint,
74
+ tunnel_endpoint=tunnel_endpoint,
75
+ )
76
+
77
+
78
+ @pytest.fixture(scope="session")
79
+ def odps_with_schema(test_config):
80
+ try:
81
+ return _get_odps_env(test_config, "odps_with_schema")
82
+ except NoSectionError:
83
+ pytest.skip("Need to specify odps_with_schema section in test.conf")
84
+
85
+
86
+ @pytest.fixture(scope="session", autouse=True)
87
+ def odps_envs(test_config):
88
+ entry = _get_odps_env(test_config, "odps")
58
89
 
59
- os.environ["ODPS_BEARER_TOKEN"] = token
60
- os.environ["ODPS_PROJECT_NAME"] = project
61
- os.environ["ODPS_ENDPOINT"] = endpoint
62
- if tunnel_endpoint:
63
- os.environ["ODPS_TUNNEL_ENDPOINT"] = tunnel_endpoint
90
+ os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
91
+ os.environ["ODPS_PROJECT_NAME"] = entry.project
92
+ os.environ["ODPS_ENDPOINT"] = entry.endpoint
93
+ if entry.tunnel_endpoint:
94
+ os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
64
95
 
65
96
  try:
66
97
  yield
@@ -95,7 +126,14 @@ def oss_config():
95
126
  oss_rolearn = config.get("oss", "rolearn")
96
127
 
97
128
  options.service_role_arn = oss_rolearn
98
- options.object_cache_url = f"oss://{oss_endpoint}/{oss_bucket_name}"
129
+ if "test" in oss_endpoint:
130
+ oss_svc_endpoint = oss_endpoint
131
+ else:
132
+ endpoint_parts = oss_endpoint.split(".", 1)
133
+ if "-internal" not in endpoint_parts[0]:
134
+ endpoint_parts[0] += "-internal"
135
+ oss_svc_endpoint = ".".join(endpoint_parts)
136
+ options.object_cache_url = f"oss://{oss_svc_endpoint}/{oss_bucket_name}"
99
137
 
100
138
  config.oss_config = (
101
139
  oss_access_id,
@@ -110,7 +148,7 @@ def oss_config():
110
148
  config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
111
149
  config.oss_rolearn = oss_rolearn
112
150
  yield config
113
- except (ConfigParser.NoSectionError, ConfigParser.NoOptionError, ImportError):
151
+ except (NoSectionError, NoOptionError, ImportError):
114
152
  return None
115
153
  finally:
116
154
  options.service_role_arn = old_role_arn
@@ -46,7 +46,7 @@ class DecrefRunner:
46
46
  break
47
47
 
48
48
  session = session_ref()
49
- if session is None:
49
+ if session is None or session.closed:
50
50
  fut.set_result(None)
51
51
  continue
52
52
  try:
@@ -86,6 +86,8 @@ class SchedulingHint(Serializable):
86
86
  # `gpu` indicates that if the operator should be executed on the GPU.
87
87
  gpu = BoolField("gpu", default=None)
88
88
  priority = Int32Field("priority", default=None)
89
+ expect_engine = StringField("expect_engine", default=None)
90
+ expect_resources = DictField("expect_resources", FieldTypes.string, default=None)
89
91
 
90
92
  @classproperty
91
93
  @lru_cache(1)
@@ -185,7 +185,6 @@ e NaN
185
185
  dtype: float64
186
186
  """
187
187
 
188
- # FIXME: https://github.com/aliyun/alibabacloud-odps-maxframe-client/issues/48
189
188
  _flex_comp_doc_FRAME = """
190
189
  Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
191
190
  Among flexible wrappers (`eq`, `ne`, `le`, `lt`, `ge`, `gt`) to comparison
@@ -291,7 +290,7 @@ C True False
291
290
 
292
291
  Compare to a DataFrame of different shape.
293
292
 
294
- >>> other = pd.DataFrame({{'revenue': [300, 250, 100, 150]}},
293
+ >>> other = md.DataFrame({{'revenue': [300, 250, 100, 150]}},
295
294
  ... index=['A', 'B', 'C', 'D'])
296
295
  >>> other.execute()
297
296
  revenue
@@ -306,6 +305,31 @@ A False False
306
305
  B False False
307
306
  C False True
308
307
  D False False
308
+
309
+ Compare to a MultiIndex by level.
310
+
311
+ >>> df_multindex = md.DataFrame({{'cost': [250, 150, 100, 150, 300, 220],
312
+ ... 'revenue': [100, 250, 300, 200, 175, 225]}},
313
+ ... index=[['Q1', 'Q1', 'Q1', 'Q2', 'Q2', 'Q2'],
314
+ ... ['A', 'B', 'C', 'A', 'B', 'C']])
315
+ >>> df_multindex.execute()
316
+ cost revenue
317
+ Q1 A 250 100
318
+ B 150 250
319
+ C 100 300
320
+ Q2 A 150 200
321
+ B 300 175
322
+ C 220 225
323
+
324
+ >>> df.le(df_multindex, level=1).execute()
325
+ cost revenue
326
+ Q1 A True True
327
+ B True True
328
+ C True True
329
+ Q2 A False True
330
+ B True False
331
+ C True False
332
+
309
333
  """
310
334
 
311
335
 
@@ -51,6 +51,8 @@ dtype: bool
51
51
 
52
52
 
53
53
  @bin_compare_doc("Equal to", equiv="==", series_example=_eq_example)
54
- def eq(df, other, axis="columns", level=None):
55
- op = DataFrameEqual(axis=axis, level=level, lhs=df, rhs=other)
54
+ def eq(df, other, axis="columns", level=None, fill_value=None):
55
+ op = DataFrameEqual(
56
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
57
+ )
56
58
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Greater than", equiv=">", series_example=_gt_example)
55
- def gt(df, other, axis="columns", level=None):
56
- op = DataFrameGreater(axis=axis, level=level, lhs=df, rhs=other)
55
+ def gt(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameGreater(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Greater than or equal to", equiv=">=", series_example=_ge_example)
55
- def ge(df, other, axis="columns", level=None):
56
- op = DataFrameGreaterEqual(axis=axis, level=level, lhs=df, rhs=other)
55
+ def ge(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameGreaterEqual(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -52,6 +52,6 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Less than", equiv="<", series_example=_lt_example)
55
- def lt(df, other, axis="columns", level=None):
56
- op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other)
55
+ def lt(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameLess(axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value)
57
57
  return op(df, other)
@@ -52,6 +52,8 @@ dtype: bool
52
52
 
53
53
 
54
54
  @bin_compare_doc("Less than or equal to", equiv="<=", series_example=_le_example)
55
- def le(df, other, axis="columns", level=None):
56
- op = DataFrameLessEqual(axis=axis, level=level, lhs=df, rhs=other)
55
+ def le(df, other, axis="columns", level=None, fill_value=None):
56
+ op = DataFrameLessEqual(
57
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
58
+ )
57
59
  return op(df, other)
@@ -51,6 +51,8 @@ dtype: bool
51
51
 
52
52
 
53
53
  @bin_compare_doc("Not equal to", equiv="!=", series_example=_ne_example)
54
- def ne(df, other, axis="columns", level=None):
55
- op = DataFrameNotEqual(axis=axis, level=level, lhs=df, rhs=other)
54
+ def ne(df, other, axis="columns", level=None, fill_value=None):
55
+ op = DataFrameNotEqual(
56
+ axis=axis, level=level, lhs=df, rhs=other, fill_value=fill_value
57
+ )
56
58
  return op(df, other)
@@ -22,6 +22,7 @@ import pandas as pd
22
22
  import pytest
23
23
 
24
24
  from ....core import OperatorType
25
+ from ....tests.utils import assert_mf_index_dtype
25
26
  from ....utils import dataslots
26
27
  from ...core import IndexValue
27
28
  from ...datasource.dataframe import from_pandas
@@ -164,7 +165,7 @@ def test_without_shuffle(func_name, func_opts):
164
165
  pd.testing.assert_index_equal(
165
166
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
166
167
  )
167
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
168
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
168
169
  pd.testing.assert_index_equal(
169
170
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
170
171
  )
@@ -176,7 +177,7 @@ def test_without_shuffle(func_name, func_opts):
176
177
  pd.testing.assert_index_equal(
177
178
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
178
179
  )
179
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
180
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
180
181
  pd.testing.assert_index_equal(
181
182
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
182
183
  )
@@ -370,7 +371,7 @@ def test_with_one_shuffle(func_name, func_opts):
370
371
  pd.testing.assert_index_equal(
371
372
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
372
373
  )
373
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
374
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
374
375
  pd.testing.assert_index_equal(
375
376
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
376
377
  )
@@ -403,7 +404,7 @@ def test_with_all_shuffle(func_name, func_opts):
403
404
  pd.testing.assert_index_equal(
404
405
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
405
406
  )
406
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
407
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
407
408
  pd.testing.assert_index_equal(
408
409
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
409
410
  )
@@ -433,7 +434,7 @@ def test_with_all_shuffle(func_name, func_opts):
433
434
  pd.testing.assert_index_equal(
434
435
  df6.columns_value.to_pandas(), func_opts.func(data4, data5).columns
435
436
  )
436
- assert isinstance(df6.index_value.value, IndexValue.Int64Index)
437
+ assert_mf_index_dtype(df6.index_value.value, np.int64)
437
438
  pd.testing.assert_index_equal(
438
439
  df6.index_value.to_pandas(), pd.Index([], dtype=np.int64)
439
440
  )
@@ -468,7 +469,7 @@ def test_without_shuffle_and_with_one_chunk(func_name, func_opts):
468
469
  pd.testing.assert_index_equal(
469
470
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
470
471
  )
471
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
472
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
472
473
  pd.testing.assert_index_equal(
473
474
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
474
475
  )
@@ -501,7 +502,7 @@ def test_both_one_chunk(func_name, func_opts):
501
502
  pd.testing.assert_index_equal(
502
503
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
503
504
  )
504
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
505
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
505
506
  pd.testing.assert_index_equal(
506
507
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
507
508
  )
@@ -534,7 +535,7 @@ def test_with_shuffle_and_one_chunk(func_name, func_opts):
534
535
  pd.testing.assert_index_equal(
535
536
  df3.columns_value.to_pandas(), func_opts.func(data1, data2).columns
536
537
  )
537
- assert isinstance(df3.index_value.value, IndexValue.Int64Index)
538
+ assert_mf_index_dtype(df3.index_value.value, np.int64)
538
539
  pd.testing.assert_index_equal(
539
540
  df3.index_value.to_pandas(), pd.Index([], dtype=np.int64)
540
541
  )
@@ -558,7 +559,7 @@ def test_on_same_dataframe(func_name, func_opts):
558
559
  pd.testing.assert_index_equal(
559
560
  df2.columns_value.to_pandas(), func_opts.func(data, data).columns
560
561
  )
561
- assert isinstance(df2.index_value.value, IndexValue.Int64Index)
562
+ assert_mf_index_dtype(df2.index_value.value, np.int64)
562
563
  pd.testing.assert_index_equal(
563
564
  df2.index_value.to_pandas(), pd.Index([], dtype=np.int64)
564
565
  )
@@ -590,19 +591,19 @@ def test_dataframe_and_scalar(func_name, func_opts):
590
591
  pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
591
592
 
592
593
  pd.testing.assert_index_equal(result.columns_value.to_pandas(), data.columns)
593
- assert isinstance(result.index_value.value, IndexValue.Int64Index)
594
+ assert_mf_index_dtype(result.index_value.value, np.int64)
594
595
 
595
596
  pd.testing.assert_index_equal(result2.columns_value.to_pandas(), data.columns)
596
- assert isinstance(result2.index_value.value, IndexValue.Int64Index)
597
+ assert_mf_index_dtype(result2.index_value.value, np.int64)
597
598
 
598
599
  pd.testing.assert_index_equal(result3.columns_value.to_pandas(), data.columns)
599
- assert isinstance(result3.index_value.value, IndexValue.Int64Index)
600
+ assert_mf_index_dtype(result3.index_value.value, np.int64)
600
601
 
601
602
  pd.testing.assert_index_equal(result4.columns_value.to_pandas(), data.columns)
602
- assert isinstance(result4.index_value.value, IndexValue.Int64Index)
603
+ assert_mf_index_dtype(result4.index_value.value, np.int64)
603
604
 
604
605
  pd.testing.assert_index_equal(result5.columns_value.to_pandas(), data.columns)
605
- assert isinstance(result5.index_value.value, IndexValue.Int64Index)
606
+ assert_mf_index_dtype(result5.index_value.value, np.int64)
606
607
 
607
608
  if "builtin_function_or_method" not in str(type(func_opts.func)):
608
609
  # skip NotImplemented test for comparison function
@@ -679,7 +680,7 @@ def test_abs():
679
680
  pd.testing.assert_index_equal(
680
681
  df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
681
682
  )
682
- assert isinstance(df2.index_value.value, IndexValue.Int64Index)
683
+ assert_mf_index_dtype(df2.index_value.value, np.int64)
683
684
  assert df2.shape == (10, 10)
684
685
 
685
686
 
@@ -697,7 +698,7 @@ def test_not():
697
698
  pd.testing.assert_index_equal(
698
699
  df2.columns_value.to_pandas(), df1.columns_value.to_pandas()
699
700
  )
700
- assert isinstance(df2.index_value.value, IndexValue.Int64Index)
701
+ assert_mf_index_dtype(df2.index_value.value, np.int64)
701
702
  assert df2.shape == (10, 10)
702
703
 
703
704
 
@@ -142,6 +142,14 @@ class IndexValue(Serializable):
142
142
  _data = NDArrayField("data")
143
143
  _dtype = DataTypeField("dtype")
144
144
 
145
+ @property
146
+ def dtype(self):
147
+ return getattr(self, "_dtype", None)
148
+
149
+ @property
150
+ def inferred_type(self):
151
+ return "floating" if self.dtype.kind == "f" else "integer"
152
+
145
153
  class RangeIndex(IndexBase):
146
154
  _name = AnyField("name")
147
155
  _slice = SliceField("slice")
@@ -243,6 +251,10 @@ class IndexValue(Serializable):
243
251
  _data = NDArrayField("data")
244
252
  _dtype = DataTypeField("dtype")
245
253
 
254
+ @property
255
+ def dtype(self):
256
+ return getattr(self, "_dtype", None)
257
+
246
258
  @property
247
259
  def inferred_type(self):
248
260
  return "integer"
@@ -254,6 +266,10 @@ class IndexValue(Serializable):
254
266
  _data = NDArrayField("data")
255
267
  _dtype = DataTypeField("dtype")
256
268
 
269
+ @property
270
+ def dtype(self):
271
+ return getattr(self, "_dtype", None)
272
+
257
273
  @property
258
274
  def inferred_type(self):
259
275
  return "integer"
@@ -265,6 +281,10 @@ class IndexValue(Serializable):
265
281
  _data = NDArrayField("data")
266
282
  _dtype = DataTypeField("dtype")
267
283
 
284
+ @property
285
+ def dtype(self):
286
+ return getattr(self, "_dtype", None)
287
+
268
288
  @property
269
289
  def inferred_type(self):
270
290
  return "floating"
@@ -1514,8 +1534,7 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1514
1534
  refresh_index_value(self)
1515
1535
  refresh_dtypes(self)
1516
1536
 
1517
- def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1518
- dtypes = table_meta.pd_column_dtypes
1537
+ def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
1519
1538
  self._dtypes = dtypes
1520
1539
  self._columns_value = parse_index(dtypes.index, store_data=True)
1521
1540
  self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
@@ -1523,6 +1542,9 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1523
1542
  new_shape[-1] = len(dtypes)
1524
1543
  self._shape = tuple(new_shape)
1525
1544
 
1545
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1546
+ self.refresh_from_dtypes(table_meta.pd_column_dtypes)
1547
+
1526
1548
  @property
1527
1549
  def dtypes(self):
1528
1550
  dt = getattr(self, "_dtypes", None)
@@ -1666,6 +1688,8 @@ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1666
1688
  raise NotImplementedError
1667
1689
 
1668
1690
  corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1691
+ if corner_data is None:
1692
+ return
1669
1693
 
1670
1694
  buf = StringIO()
1671
1695
  max_rows = pd.get_option("display.max_rows")