maxframe 0.1.0b5__cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl → 1.0.0__cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (202) hide show
  1. maxframe/_utils.cpython-37m-aarch64-linux-gnu.so +0 -0
  2. maxframe/codegen.py +10 -4
  3. maxframe/config/config.py +68 -10
  4. maxframe/config/validators.py +42 -11
  5. maxframe/conftest.py +58 -14
  6. maxframe/core/__init__.py +2 -16
  7. maxframe/core/entity/__init__.py +1 -12
  8. maxframe/core/entity/executable.py +1 -1
  9. maxframe/core/entity/objects.py +46 -45
  10. maxframe/core/entity/output_types.py +0 -3
  11. maxframe/core/entity/tests/test_objects.py +43 -0
  12. maxframe/core/entity/tileables.py +5 -78
  13. maxframe/core/graph/__init__.py +2 -2
  14. maxframe/core/graph/builder/__init__.py +0 -1
  15. maxframe/core/graph/builder/base.py +5 -4
  16. maxframe/core/graph/builder/tileable.py +4 -4
  17. maxframe/core/graph/builder/utils.py +4 -8
  18. maxframe/core/graph/core.cpython-37m-aarch64-linux-gnu.so +0 -0
  19. maxframe/core/graph/core.pyx +4 -4
  20. maxframe/core/graph/entity.py +9 -33
  21. maxframe/core/operator/__init__.py +2 -9
  22. maxframe/core/operator/base.py +3 -5
  23. maxframe/core/operator/objects.py +0 -9
  24. maxframe/core/operator/utils.py +55 -0
  25. maxframe/dataframe/__init__.py +1 -1
  26. maxframe/dataframe/arithmetic/around.py +5 -17
  27. maxframe/dataframe/arithmetic/core.py +15 -7
  28. maxframe/dataframe/arithmetic/docstring.py +7 -33
  29. maxframe/dataframe/arithmetic/equal.py +4 -2
  30. maxframe/dataframe/arithmetic/greater.py +4 -2
  31. maxframe/dataframe/arithmetic/greater_equal.py +4 -2
  32. maxframe/dataframe/arithmetic/less.py +2 -2
  33. maxframe/dataframe/arithmetic/less_equal.py +4 -2
  34. maxframe/dataframe/arithmetic/not_equal.py +4 -2
  35. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
  36. maxframe/dataframe/core.py +31 -7
  37. maxframe/dataframe/datasource/date_range.py +2 -2
  38. maxframe/dataframe/datasource/read_odps_query.py +117 -23
  39. maxframe/dataframe/datasource/read_odps_table.py +6 -3
  40. maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
  41. maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
  42. maxframe/dataframe/datastore/to_odps.py +28 -0
  43. maxframe/dataframe/extensions/__init__.py +5 -0
  44. maxframe/dataframe/extensions/flatjson.py +131 -0
  45. maxframe/dataframe/extensions/flatmap.py +317 -0
  46. maxframe/dataframe/extensions/reshuffle.py +1 -1
  47. maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
  48. maxframe/dataframe/groupby/core.py +1 -1
  49. maxframe/dataframe/groupby/cum.py +0 -1
  50. maxframe/dataframe/groupby/fill.py +4 -1
  51. maxframe/dataframe/groupby/getitem.py +6 -0
  52. maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
  53. maxframe/dataframe/groupby/transform.py +5 -1
  54. maxframe/dataframe/indexing/align.py +1 -1
  55. maxframe/dataframe/indexing/loc.py +6 -4
  56. maxframe/dataframe/indexing/rename.py +5 -28
  57. maxframe/dataframe/indexing/sample.py +0 -1
  58. maxframe/dataframe/indexing/set_index.py +68 -1
  59. maxframe/dataframe/initializer.py +11 -1
  60. maxframe/dataframe/merge/__init__.py +9 -1
  61. maxframe/dataframe/merge/concat.py +41 -31
  62. maxframe/dataframe/merge/merge.py +237 -3
  63. maxframe/dataframe/merge/tests/test_merge.py +126 -1
  64. maxframe/dataframe/misc/apply.py +5 -10
  65. maxframe/dataframe/misc/case_when.py +1 -1
  66. maxframe/dataframe/misc/describe.py +2 -2
  67. maxframe/dataframe/misc/drop_duplicates.py +8 -8
  68. maxframe/dataframe/misc/eval.py +4 -0
  69. maxframe/dataframe/misc/memory_usage.py +2 -2
  70. maxframe/dataframe/misc/pct_change.py +1 -83
  71. maxframe/dataframe/misc/tests/test_misc.py +33 -2
  72. maxframe/dataframe/misc/transform.py +1 -30
  73. maxframe/dataframe/misc/value_counts.py +4 -17
  74. maxframe/dataframe/missing/dropna.py +1 -1
  75. maxframe/dataframe/missing/fillna.py +5 -5
  76. maxframe/dataframe/operators.py +1 -17
  77. maxframe/dataframe/reduction/core.py +2 -2
  78. maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
  79. maxframe/dataframe/sort/sort_values.py +1 -11
  80. maxframe/dataframe/statistics/corr.py +3 -3
  81. maxframe/dataframe/statistics/quantile.py +13 -19
  82. maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
  83. maxframe/dataframe/tests/test_initializer.py +33 -2
  84. maxframe/dataframe/utils.py +26 -11
  85. maxframe/dataframe/window/expanding.py +5 -3
  86. maxframe/dataframe/window/tests/test_expanding.py +2 -2
  87. maxframe/errors.py +13 -0
  88. maxframe/extension.py +12 -0
  89. maxframe/io/__init__.py +13 -0
  90. maxframe/io/objects/__init__.py +24 -0
  91. maxframe/io/objects/core.py +140 -0
  92. maxframe/io/objects/tensor.py +76 -0
  93. maxframe/io/objects/tests/__init__.py +13 -0
  94. maxframe/io/objects/tests/test_object_io.py +97 -0
  95. maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
  96. maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
  97. maxframe/{odpsio → io/odpsio}/schema.py +38 -16
  98. maxframe/io/odpsio/tableio.py +719 -0
  99. maxframe/io/odpsio/tests/__init__.py +13 -0
  100. maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
  101. maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
  102. maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
  103. maxframe/io/odpsio/volumeio.py +63 -0
  104. maxframe/learn/contrib/__init__.py +3 -1
  105. maxframe/learn/contrib/graph/__init__.py +15 -0
  106. maxframe/learn/contrib/graph/connected_components.py +215 -0
  107. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  108. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  109. maxframe/learn/contrib/llm/__init__.py +16 -0
  110. maxframe/learn/contrib/llm/core.py +54 -0
  111. maxframe/learn/contrib/llm/models/__init__.py +14 -0
  112. maxframe/learn/contrib/llm/models/dashscope.py +73 -0
  113. maxframe/learn/contrib/llm/multi_modal.py +42 -0
  114. maxframe/learn/contrib/llm/text.py +42 -0
  115. maxframe/learn/contrib/xgboost/classifier.py +26 -2
  116. maxframe/learn/contrib/xgboost/core.py +87 -2
  117. maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
  118. maxframe/learn/contrib/xgboost/predict.py +29 -46
  119. maxframe/learn/contrib/xgboost/regressor.py +3 -10
  120. maxframe/learn/contrib/xgboost/train.py +29 -18
  121. maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
  122. maxframe/lib/mmh3.pyi +43 -0
  123. maxframe/lib/sparse/tests/test_sparse.py +15 -15
  124. maxframe/lib/wrapped_pickle.py +2 -1
  125. maxframe/opcodes.py +8 -0
  126. maxframe/protocol.py +154 -27
  127. maxframe/remote/core.py +4 -8
  128. maxframe/serialization/__init__.py +1 -0
  129. maxframe/serialization/core.cpython-37m-aarch64-linux-gnu.so +0 -0
  130. maxframe/serialization/core.pxd +3 -0
  131. maxframe/serialization/core.pyi +3 -0
  132. maxframe/serialization/core.pyx +67 -26
  133. maxframe/serialization/exception.py +1 -1
  134. maxframe/serialization/pandas.py +52 -17
  135. maxframe/serialization/serializables/core.py +180 -15
  136. maxframe/serialization/serializables/field_type.py +4 -1
  137. maxframe/serialization/serializables/tests/test_serializable.py +54 -5
  138. maxframe/serialization/tests/test_serial.py +2 -1
  139. maxframe/session.py +9 -2
  140. maxframe/tensor/__init__.py +81 -2
  141. maxframe/tensor/arithmetic/isclose.py +1 -0
  142. maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
  143. maxframe/tensor/core.py +5 -136
  144. maxframe/tensor/datasource/array.py +3 -0
  145. maxframe/tensor/datasource/full.py +1 -1
  146. maxframe/tensor/datasource/tests/test_datasource.py +1 -1
  147. maxframe/tensor/indexing/flatnonzero.py +1 -1
  148. maxframe/tensor/indexing/getitem.py +2 -0
  149. maxframe/tensor/merge/__init__.py +2 -0
  150. maxframe/tensor/merge/concatenate.py +101 -0
  151. maxframe/tensor/merge/tests/test_merge.py +30 -1
  152. maxframe/tensor/merge/vstack.py +74 -0
  153. maxframe/tensor/{base → misc}/__init__.py +2 -0
  154. maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
  155. maxframe/tensor/misc/atleast_2d.py +70 -0
  156. maxframe/tensor/misc/atleast_3d.py +85 -0
  157. maxframe/tensor/misc/tests/__init__.py +13 -0
  158. maxframe/tensor/{base → misc}/transpose.py +22 -18
  159. maxframe/tensor/{base → misc}/unique.py +3 -3
  160. maxframe/tensor/operators.py +1 -7
  161. maxframe/tensor/random/core.py +1 -1
  162. maxframe/tensor/reduction/count_nonzero.py +2 -1
  163. maxframe/tensor/reduction/mean.py +1 -0
  164. maxframe/tensor/reduction/nanmean.py +1 -0
  165. maxframe/tensor/reduction/nanvar.py +2 -0
  166. maxframe/tensor/reduction/tests/test_reduction.py +12 -1
  167. maxframe/tensor/reduction/var.py +2 -0
  168. maxframe/tensor/statistics/quantile.py +2 -2
  169. maxframe/tensor/utils.py +2 -22
  170. maxframe/tests/test_protocol.py +34 -0
  171. maxframe/tests/test_utils.py +0 -12
  172. maxframe/tests/utils.py +17 -2
  173. maxframe/typing_.py +4 -1
  174. maxframe/udf.py +8 -9
  175. maxframe/utils.py +106 -86
  176. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +3 -3
  177. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +579 -555
  178. maxframe_client/__init__.py +0 -1
  179. maxframe_client/clients/framedriver.py +4 -1
  180. maxframe_client/fetcher.py +81 -74
  181. maxframe_client/session/consts.py +3 -0
  182. maxframe_client/session/graph.py +8 -2
  183. maxframe_client/session/odps.py +194 -40
  184. maxframe_client/session/task.py +94 -39
  185. maxframe_client/tests/test_fetcher.py +21 -3
  186. maxframe_client/tests/test_session.py +109 -8
  187. maxframe/core/entity/chunks.py +0 -68
  188. maxframe/core/entity/fuse.py +0 -73
  189. maxframe/core/graph/builder/chunk.py +0 -430
  190. maxframe/odpsio/tableio.py +0 -322
  191. maxframe/odpsio/volumeio.py +0 -95
  192. maxframe_client/clients/spe.py +0 -104
  193. /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
  194. /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
  195. /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
  196. /maxframe/tensor/{base → misc}/astype.py +0 -0
  197. /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
  198. /maxframe/tensor/{base → misc}/ravel.py +0 -0
  199. /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
  200. /maxframe/tensor/{base → misc}/where.py +0 -0
  201. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +0 -0
  202. {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/codegen.py CHANGED
@@ -26,9 +26,9 @@ from odps.utils import camel_to_underline
26
26
  from .core import OperatorType, Tileable, TileableGraph
27
27
  from .core.operator import Fetch
28
28
  from .extension import iter_extensions
29
+ from .io.odpsio import build_dataframe_table_meta
30
+ from .io.odpsio.schema import pandas_to_odps_schema
29
31
  from .lib import wrapped_pickle as pickle
30
- from .odpsio import build_dataframe_table_meta
31
- from .odpsio.schema import pandas_to_odps_schema
32
32
  from .protocol import DataFrameTableMeta, ResultInfo
33
33
  from .serialization import PickleContainer
34
34
  from .serialization.serializables import Serializable, StringField
@@ -86,6 +86,8 @@ class AbstractUDF(Serializable):
86
86
 
87
87
 
88
88
  class UserCodeMixin:
89
+ __slots__ = ()
90
+
89
91
  @classmethod
90
92
  def obj_to_python_expr(cls, obj: Any = None) -> str:
91
93
  """
@@ -344,6 +346,9 @@ def register_engine_codegen(type_: Type["BigDagCodeGenerator"]):
344
346
  BUILTIN_ENGINE_SPE = "SPE"
345
347
  BUILTIN_ENGINE_MCSQL = "MCSQL"
346
348
 
349
+ FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
350
+ ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
351
+
347
352
 
348
353
  class BigDagCodeGenerator(metaclass=abc.ABCMeta):
349
354
  _context: BigDagCodeContext
@@ -502,6 +507,7 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
502
507
  prefer_binary=pack.prefer_binary,
503
508
  pre_release=pack.pre_release,
504
509
  force_rebuild=pack.force_rebuild,
510
+ no_audit_wheel=pack.no_audit_wheel,
505
511
  python_tag=python_tag,
506
512
  is_production=is_production,
507
513
  schedule_id=schedule_id,
@@ -516,12 +522,12 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
516
522
 
517
523
  def register_udfs(self, odps_ctx: "ODPSSessionContext"):
518
524
  for udf in self._context.get_udfs():
519
- logger.info("[Session %s] Registering UDF %s", self._session_id, udf.name)
525
+ logger.info("[Session=%s] Registering UDF %s", self._session_id, udf.name)
520
526
  udf.register(odps_ctx, True)
521
527
 
522
528
  def unregister_udfs(self, odps_ctx: "ODPSSessionContext"):
523
529
  for udf in self._context.get_udfs():
524
- logger.info("[Session %s] Unregistering UDF %s", self._session_id, udf.name)
530
+ logger.info("[Session=%s] Unregistering UDF %s", self._session_id, udf.name)
525
531
  udf.unregister(odps_ctx)
526
532
 
527
533
  def get_udfs(self) -> List[AbstractUDF]:
maxframe/config/config.py CHANGED
@@ -19,28 +19,40 @@ import warnings
19
19
  from copy import deepcopy
20
20
  from typing import Any, Dict, Optional, Union
21
21
 
22
+ from odps.lib import tzlocal
23
+
24
+ try:
25
+ from zoneinfo import available_timezones
26
+ except ImportError:
27
+ from pytz import all_timezones
28
+
29
+ available_timezones = lambda: all_timezones
30
+
22
31
  from ..utils import get_python_tag
23
32
  from .validators import (
24
33
  ValidatorType,
25
34
  all_validator,
26
- any_validator,
27
35
  is_bool,
28
36
  is_dict,
29
37
  is_in,
30
38
  is_integer,
39
+ is_non_negative_integer,
31
40
  is_null,
32
41
  is_numeric,
33
42
  is_string,
43
+ is_valid_cache_path,
34
44
  )
35
45
 
36
46
  _DEFAULT_REDIRECT_WARN = "Option {source} has been replaced by {target} and might be removed in a future release."
37
47
  _DEFAULT_MAX_ALIVE_SECONDS = 3 * 24 * 3600
38
48
  _DEFAULT_MAX_IDLE_SECONDS = 3600
39
49
  _DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS = 120
50
+ _DEFAULT_SPE_FAILURE_RETRY_TIMES = 5
40
51
  _DEFAULT_UPLOAD_BATCH_SIZE = 4096
41
52
  _DEFAULT_TEMP_LIFECYCLE = 1
42
53
  _DEFAULT_TASK_START_TIMEOUT = 60
43
- _DEFAULT_LOGVIEW_HOURS = 24 * 60
54
+ _DEFAULT_TASK_RESTART_TIMEOUT = 300
55
+ _DEFAULT_LOGVIEW_HOURS = 24 * 30
44
56
 
45
57
 
46
58
  class OptionError(Exception):
@@ -296,28 +308,60 @@ class Config:
296
308
  return {k: v for k, v in res.items() if k in self._remote_options}
297
309
 
298
310
 
311
+ def _get_legal_local_tz_name() -> Optional[str]:
312
+ """Sometimes we may get illegal tz name from tzlocal.get_localzone()"""
313
+ tz_name = str(tzlocal.get_localzone())
314
+ if tz_name not in available_timezones():
315
+ return None
316
+ return tz_name
317
+
318
+
299
319
  default_options = Config()
300
320
  default_options.register_option(
301
321
  "execution_mode", "trigger", validator=is_in(["trigger", "eager"])
302
322
  )
323
+ default_options.register_option("use_common_table", False, validator=is_bool)
303
324
  default_options.register_option(
304
325
  "python_tag", get_python_tag(), validator=is_string, remote=True
305
326
  )
327
+ default_options.register_option(
328
+ "local_timezone",
329
+ _get_legal_local_tz_name(),
330
+ validator=is_null | is_in(set(available_timezones())),
331
+ remote=True,
332
+ )
306
333
  default_options.register_option(
307
334
  "session.logview_hours", _DEFAULT_LOGVIEW_HOURS, validator=is_integer, remote=True
308
335
  )
309
336
  default_options.register_option(
310
337
  "client.task_start_timeout", _DEFAULT_TASK_START_TIMEOUT, validator=is_integer
311
338
  )
339
+ default_options.register_option(
340
+ "client.task_restart_timeout", _DEFAULT_TASK_RESTART_TIMEOUT, validator=is_integer
341
+ )
312
342
  default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remote=True)
313
343
  default_options.register_option(
314
344
  "sql.generate_comments", True, validator=is_bool, remote=True
315
345
  )
346
+ default_options.register_option(
347
+ "sql.auto_use_common_image", True, validator=is_bool, remote=True
348
+ )
316
349
  default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
317
350
 
318
351
  default_options.register_option("is_production", False, validator=is_bool, remote=True)
319
352
  default_options.register_option("schedule_id", "", validator=is_string, remote=True)
320
353
 
354
+ default_options.register_option(
355
+ "service_role_arn", None, validator=is_null | is_string, remote=True
356
+ )
357
+ default_options.register_option(
358
+ "object_cache_url", None, validator=is_null | is_valid_cache_path, remote=True
359
+ )
360
+
361
+ default_options.register_option(
362
+ "chunk_size", None, validator=is_null | is_integer, remote=True
363
+ )
364
+
321
365
  default_options.register_option(
322
366
  "session.max_alive_seconds",
323
367
  _DEFAULT_MAX_ALIVE_SECONDS,
@@ -330,15 +374,25 @@ default_options.register_option(
330
374
  validator=is_numeric,
331
375
  remote=True,
332
376
  )
377
+ default_options.register_option(
378
+ "session.quota_name", None, validator=is_null | is_string, remote=True
379
+ )
380
+ default_options.register_option(
381
+ "session.enable_schema", None, validator=is_null | is_bool, remote=True
382
+ )
383
+ default_options.register_option(
384
+ "session.enable_high_availability", None, validator=is_null | is_bool, remote=True
385
+ )
386
+ default_options.register_option(
387
+ "session.default_schema", None, validator=is_null | is_string, remote=True
388
+ )
333
389
  default_options.register_option(
334
390
  "session.upload_batch_size",
335
391
  _DEFAULT_UPLOAD_BATCH_SIZE,
336
392
  validator=is_integer,
337
393
  )
338
394
  default_options.register_option(
339
- "session.table_lifecycle",
340
- None,
341
- validator=any_validator(is_null, is_integer),
395
+ "session.table_lifecycle", None, validator=is_null | is_integer, remote=True
342
396
  )
343
397
  default_options.register_option(
344
398
  "session.temp_table_lifecycle",
@@ -349,7 +403,7 @@ default_options.register_option(
349
403
  default_options.register_option(
350
404
  "session.subinstance_priority",
351
405
  None,
352
- validator=any_validator(is_null, is_integer),
406
+ validator=is_null | is_integer,
353
407
  remote=True,
354
408
  )
355
409
 
@@ -361,9 +415,7 @@ default_options.register_option(
361
415
  default_options.register_option(
362
416
  "optimize.head_optimize_threshold", 1000, validator=is_integer
363
417
  )
364
- default_options.register_option(
365
- "show_progress", "auto", validator=any_validator(is_bool, is_string)
366
- )
418
+ default_options.register_option("show_progress", "auto", validator=is_bool | is_string)
367
419
  default_options.register_option(
368
420
  "dag.settings", value=dict(), validator=is_dict, remote=True
369
421
  )
@@ -374,7 +426,13 @@ default_options.register_option(
374
426
  default_options.register_option(
375
427
  "spe.operation_timeout_seconds",
376
428
  _DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS,
377
- validator=is_integer,
429
+ validator=is_non_negative_integer,
430
+ remote=True,
431
+ )
432
+ default_options.register_option(
433
+ "spe.failure_retry_times",
434
+ _DEFAULT_SPE_FAILURE_RETRY_TIMES,
435
+ validator=is_non_negative_integer,
378
436
  remote=True,
379
437
  )
380
438
 
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from typing import Callable
16
+ from urllib.parse import urlparse
16
17
 
17
18
  ValidatorType = Callable[..., bool]
18
19
 
@@ -32,21 +33,51 @@ def all_validator(*validators: ValidatorType):
32
33
  return validate
33
34
 
34
35
 
35
- is_null = lambda x: x is None
36
- is_bool = lambda x: isinstance(x, bool)
37
- is_float = lambda x: isinstance(x, float)
38
- is_integer = lambda x: isinstance(x, int)
39
- is_numeric = lambda x: isinstance(x, (int, float))
40
- is_string = lambda x: isinstance(x, str)
41
- is_dict = lambda x: isinstance(x, dict)
42
- is_positive_integer = lambda x: is_integer(x) and x > 0
36
+ class Validator:
37
+ def __init__(self, func: ValidatorType):
38
+ self._func = func
39
+
40
+ def __call__(self, arg) -> bool:
41
+ return self._func(arg)
42
+
43
+ def __or__(self, other):
44
+ return OrValidator(self, other)
45
+
46
+
47
+ class OrValidator(Validator):
48
+ def __init__(self, lhs: Validator, rhs: Validator):
49
+ super().__init__(lambda x: lhs(x) or rhs(x))
50
+
51
+
52
+ is_null = Validator(lambda x: x is None)
53
+ is_bool = Validator(lambda x: isinstance(x, bool))
54
+ is_float = Validator(lambda x: isinstance(x, float))
55
+ is_integer = Validator(lambda x: isinstance(x, int))
56
+ is_numeric = Validator(lambda x: isinstance(x, (int, float)))
57
+ is_string = Validator(lambda x: isinstance(x, str))
58
+ is_dict = Validator(lambda x: isinstance(x, dict))
59
+ is_positive_integer = Validator(lambda x: is_integer(x) and x > 0)
60
+ is_non_negative_integer = Validator(lambda x: is_integer(x) and x >= 0)
43
61
 
44
62
 
45
63
  def is_in(vals):
46
- def validate(x):
47
- return x in vals
64
+ return Validator(vals.__contains__)
48
65
 
49
- return validate
66
+
67
+ def _is_valid_cache_path(path: str) -> bool:
68
+ """
69
+ path should look like oss://oss_endpoint/oss_bucket/path
70
+ """
71
+ parsed_url = urlparse(path)
72
+ return (
73
+ parsed_url.scheme == "oss"
74
+ and parsed_url.netloc
75
+ and parsed_url.path
76
+ and "/" in parsed_url.path
77
+ )
78
+
79
+
80
+ is_valid_cache_path = Validator(_is_valid_cache_path)
50
81
 
51
82
 
52
83
  _invalid_char_in_yaml_str = {'"', "'", "\n", "\\"}
maxframe/conftest.py CHANGED
@@ -14,10 +14,13 @@
14
14
 
15
15
  import faulthandler
16
16
  import os
17
- from configparser import ConfigParser, NoOptionError
17
+ from configparser import ConfigParser, NoOptionError, NoSectionError
18
18
 
19
19
  import pytest
20
20
  from odps import ODPS
21
+ from odps.accounts import BearerTokenAccount
22
+
23
+ from .config import options
21
24
 
22
25
  faulthandler.enable(all_threads=True)
23
26
  _test_conf_file_name = os.path.join(
@@ -32,12 +35,23 @@ def test_config():
32
35
  return config
33
36
 
34
37
 
35
- @pytest.fixture(scope="session", autouse=True)
36
- def odps_envs(test_config):
37
- access_id = test_config.get("odps", "access_id")
38
- secret_access_key = test_config.get("odps", "secret_access_key")
39
- project = test_config.get("odps", "project")
40
- endpoint = test_config.get("odps", "endpoint")
38
+ def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
39
+ try:
40
+ access_id = test_config.get(section_name, "access_id")
41
+ except NoOptionError:
42
+ access_id = test_config.get("odps", "access_id")
43
+ try:
44
+ secret_access_key = test_config.get(section_name, "secret_access_key")
45
+ except NoOptionError:
46
+ secret_access_key = test_config.get("odps", "secret_access_key")
47
+ try:
48
+ project = test_config.get(section_name, "project")
49
+ except NoOptionError:
50
+ project = test_config.get("odps", "project")
51
+ try:
52
+ endpoint = test_config.get(section_name, "endpoint")
53
+ except NoOptionError:
54
+ endpoint = test_config.get("odps", "endpoint")
41
55
  try:
42
56
  tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
43
57
  except NoOptionError:
@@ -53,12 +67,31 @@ def odps_envs(test_config):
53
67
  ],
54
68
  }
55
69
  token = entry.get_project().generate_auth_token(policy, "bearer", 5)
70
+ return ODPS(
71
+ account=BearerTokenAccount(token, 5),
72
+ project=project,
73
+ endpoint=endpoint,
74
+ tunnel_endpoint=tunnel_endpoint,
75
+ )
56
76
 
57
- os.environ["ODPS_BEARER_TOKEN"] = token
58
- os.environ["ODPS_PROJECT_NAME"] = project
59
- os.environ["ODPS_ENDPOINT"] = endpoint
60
- if tunnel_endpoint:
61
- os.environ["ODPS_TUNNEL_ENDPOINT"] = tunnel_endpoint
77
+
78
+ @pytest.fixture(scope="session")
79
+ def odps_with_schema(test_config):
80
+ try:
81
+ return _get_odps_env(test_config, "odps_with_schema")
82
+ except NoSectionError:
83
+ pytest.skip("Need to specify odps_with_schema section in test.conf")
84
+
85
+
86
+ @pytest.fixture(scope="session", autouse=True)
87
+ def odps_envs(test_config):
88
+ entry = _get_odps_env(test_config, "odps")
89
+
90
+ os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
91
+ os.environ["ODPS_PROJECT_NAME"] = entry.project
92
+ os.environ["ODPS_ENDPOINT"] = entry.endpoint
93
+ if entry.tunnel_endpoint:
94
+ os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
62
95
 
63
96
  try:
64
97
  yield
@@ -77,16 +110,23 @@ def odps_envs(test_config):
77
110
  pass
78
111
 
79
112
 
80
- @pytest.fixture
113
+ @pytest.fixture(scope="session")
81
114
  def oss_config():
82
115
  config = ConfigParser()
83
116
  config.read(_test_conf_file_name)
84
117
 
118
+ old_role_arn = options.service_role_arn
119
+ old_cache_url = options.object_cache_url
120
+
85
121
  try:
86
122
  oss_access_id = config.get("oss", "access_id")
87
123
  oss_secret_access_key = config.get("oss", "secret_access_key")
88
124
  oss_bucket_name = config.get("oss", "bucket_name")
89
125
  oss_endpoint = config.get("oss", "endpoint")
126
+ oss_rolearn = config.get("oss", "rolearn")
127
+
128
+ options.service_role_arn = oss_rolearn
129
+ options.object_cache_url = f"oss://{oss_endpoint}/{oss_bucket_name}"
90
130
 
91
131
  config.oss_config = (
92
132
  oss_access_id,
@@ -99,9 +139,13 @@ def oss_config():
99
139
 
100
140
  auth = oss2.Auth(oss_access_id, oss_secret_access_key)
101
141
  config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
102
- return config
142
+ config.oss_rolearn = oss_rolearn
143
+ yield config
103
144
  except (ConfigParser.NoSectionError, ConfigParser.NoOptionError, ImportError):
104
145
  return None
146
+ finally:
147
+ options.service_role_arn = old_role_arn
148
+ options.object_cache_url = old_cache_url
105
149
 
106
150
 
107
151
  @pytest.fixture(autouse=True)
maxframe/core/__init__.py CHANGED
@@ -14,27 +14,18 @@
14
14
 
15
15
  # noinspection PyUnresolvedReferences
16
16
  from ..typing_ import ChunkType, EntityType, OperatorType, TileableType
17
- from .base import ExecutionError
17
+ from .base import Base, ExecutionError
18
18
  from .entity import (
19
- CHUNK_TYPE,
20
19
  ENTITY_TYPE,
21
- FUSE_CHUNK_TYPE,
22
- OBJECT_CHUNK_TYPE,
23
20
  OBJECT_TYPE,
24
21
  TILEABLE_TYPE,
25
- Chunk,
26
- ChunkData,
27
22
  Entity,
28
23
  EntityData,
29
24
  ExecutableTuple,
30
- FuseChunk,
31
- FuseChunkData,
32
25
  HasShapeTileable,
33
26
  HasShapeTileableData,
34
27
  NotSupportTile,
35
28
  Object,
36
- ObjectChunk,
37
- ObjectChunkData,
38
29
  ObjectData,
39
30
  OutputType,
40
31
  Tileable,
@@ -43,23 +34,18 @@ from .entity import (
43
34
  get_fetch_class,
44
35
  get_output_types,
45
36
  get_tileable_types,
46
- register,
47
37
  register_fetch_class,
48
38
  register_output_types,
49
- unregister,
50
39
  )
51
40
 
52
41
  # noinspection PyUnresolvedReferences
53
42
  from .graph import (
54
43
  DAG,
55
- ChunkGraph,
56
- ChunkGraphBuilder,
57
44
  DirectedGraph,
58
45
  GraphContainsCycleError,
59
46
  GraphSerializer,
60
47
  TileableGraph,
61
48
  TileableGraphBuilder,
62
- TileContext,
63
- TileStatus,
64
49
  )
65
50
  from .mode import enter_mode, is_build_mode, is_eager_mode, is_kernel_mode
51
+ from .operator import build_fetch
@@ -12,18 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from .chunks import CHUNK_TYPE, Chunk, ChunkData
16
15
  from .core import ENTITY_TYPE, Entity, EntityData
17
16
  from .executable import ExecutableTuple, _ExecuteAndFetchMixin
18
- from .fuse import FUSE_CHUNK_TYPE, FuseChunk, FuseChunkData
19
- from .objects import (
20
- OBJECT_CHUNK_TYPE,
21
- OBJECT_TYPE,
22
- Object,
23
- ObjectChunk,
24
- ObjectChunkData,
25
- ObjectData,
26
- )
17
+ from .objects import OBJECT_TYPE, Object, ObjectData
27
18
  from .output_types import (
28
19
  OutputType,
29
20
  get_fetch_class,
@@ -39,6 +30,4 @@ from .tileables import (
39
30
  NotSupportTile,
40
31
  Tileable,
41
32
  TileableData,
42
- register,
43
- unregister,
44
33
  )
@@ -46,7 +46,7 @@ class DecrefRunner:
46
46
  break
47
47
 
48
48
  session = session_ref()
49
- if session is None:
49
+ if session is None or session.closed:
50
50
  fut.set_result(None)
51
51
  continue
52
52
  try:
@@ -12,63 +12,57 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Dict
15
+ from typing import Any, Dict, Type
16
16
 
17
- from ...serialization.serializables import FieldTypes, ListField
18
- from ...utils import skip_na_call
19
- from .chunks import Chunk, ChunkData
17
+ from ...serialization import load_type
18
+ from ...serialization.serializables import StringField
20
19
  from .core import Entity
21
20
  from .executable import _ToObjectMixin
22
21
  from .tileables import TileableData
23
22
 
24
23
 
25
- class ObjectChunkData(ChunkData):
26
- # chunk whose data could be any serializable
24
+ class ObjectData(TileableData, _ToObjectMixin):
27
25
  __slots__ = ()
28
26
  type_name = "Object"
27
+ # workaround for removed field since v0.1.0b5
28
+ # todo remove this when all versions below v1.0.0rc1 is eliminated
29
+ _legacy_deprecated_non_primitives = ["_chunks"]
30
+ _legacy_new_non_primitives = ["object_class"]
29
31
 
30
- def __init__(self, op=None, index=None, **kw):
31
- super().__init__(_op=op, _index=index, **kw)
32
-
33
- @property
34
- def params(self) -> Dict[str, Any]:
35
- # params return the properties which useful to rebuild a new chunk
36
- return {
37
- "index": self.index,
38
- }
39
-
40
- @params.setter
41
- def params(self, new_params: Dict[str, Any]):
42
- params = new_params.copy()
43
- params.pop("index", None) # index not needed to update
44
- if params: # pragma: no cover
45
- raise TypeError(f"Unknown params: {list(params)}")
32
+ object_class = StringField("object_class", default=None)
46
33
 
47
34
  @classmethod
48
- def get_params_from_data(cls, data: Any) -> Dict[str, Any]:
49
- return dict()
50
-
51
-
52
- class ObjectChunk(Chunk):
53
- __slots__ = ()
54
- _allow_data_type_ = (ObjectChunkData,)
55
- type_name = "Object"
56
-
57
-
58
- class ObjectData(TileableData, _ToObjectMixin):
59
- __slots__ = ()
60
- type_name = "Object"
61
-
62
- # optional fields
63
- _chunks = ListField(
64
- "chunks",
65
- FieldTypes.reference(ObjectChunkData),
66
- on_serialize=skip_na_call(lambda x: [it.data for it in x]),
67
- on_deserialize=skip_na_call(lambda x: [ObjectChunk(it) for it in x]),
68
- )
35
+ def get_entity_class(cls) -> Type["Object"]:
36
+ if getattr(cls, "_entity_class", None) is not None:
37
+ return cls._entity_class
38
+ assert cls.__qualname__[-4:] == "Data"
39
+ target_class_name = cls.__module__ + "#" + cls.__qualname__[:-4]
40
+ cls._entity_class = load_type(target_class_name, Object)
41
+ return cls._entity_class
42
+
43
+ def __new__(cls, op=None, nsplits=None, **kw):
44
+ if cls is ObjectData:
45
+ obj_cls = kw.get("object_class")
46
+ if isinstance(obj_cls, str):
47
+ obj_cls = load_type(obj_cls, (Object, ObjectData))
48
+ if isinstance(obj_cls, type) and issubclass(obj_cls, Object):
49
+ obj_cls = obj_cls.get_data_class()
50
+
51
+ if obj_cls is not None and cls is not obj_cls:
52
+ return obj_cls(op=op, nsplits=nsplits, **kw)
53
+ return super().__new__(cls)
69
54
 
70
55
  def __init__(self, op=None, nsplits=None, **kw):
56
+ obj_cls = kw.pop("object_class", None)
57
+ if isinstance(obj_cls, type):
58
+ if isinstance(obj_cls, type) and issubclass(obj_cls, Object):
59
+ obj_cls = obj_cls.get_data_class()
60
+ kw["object_class"] = obj_cls.__module__ + "#" + obj_cls.__qualname__
61
+
71
62
  super().__init__(_op=op, _nsplits=nsplits, **kw)
63
+ if self.object_class is None and type(self) is not ObjectData:
64
+ cls = type(self)
65
+ self.object_class = cls.__module__ + "#" + cls.__qualname__
72
66
 
73
67
  def __repr__(self):
74
68
  return f"Object <op={type(self.op).__name__}, key={self.key}>"
@@ -76,7 +70,7 @@ class ObjectData(TileableData, _ToObjectMixin):
76
70
  @property
77
71
  def params(self):
78
72
  # params return the properties which useful to rebuild a new tileable object
79
- return dict()
73
+ return dict(object_class=self.object_class)
80
74
 
81
75
  @params.setter
82
76
  def params(self, new_params: Dict[str, Any]):
@@ -95,6 +89,13 @@ class Object(Entity, _ToObjectMixin):
95
89
  _allow_data_type_ = (ObjectData,)
96
90
  type_name = "Object"
97
91
 
92
+ @classmethod
93
+ def get_data_class(cls) -> Type[ObjectData]:
94
+ if getattr(cls, "_data_class", None) is not None:
95
+ return cls._data_class
96
+ target_class_name = cls.__module__ + "#" + cls.__qualname__ + "Data"
97
+ cls._data_class = load_type(target_class_name, ObjectData)
98
+ return cls._data_class
99
+
98
100
 
99
101
  OBJECT_TYPE = (Object, ObjectData)
100
- OBJECT_CHUNK_TYPE = (ObjectChunk, ObjectChunkData)
@@ -15,7 +15,6 @@
15
15
  import functools
16
16
  from enum import Enum
17
17
 
18
- from .fuse import FUSE_CHUNK_TYPE
19
18
  from .objects import OBJECT_TYPE
20
19
 
21
20
 
@@ -77,8 +76,6 @@ def get_output_types(*objs, unknown_as=None):
77
76
  for obj in objs:
78
77
  if obj is None:
79
78
  continue
80
- elif isinstance(obj, FUSE_CHUNK_TYPE):
81
- obj = obj.chunk
82
79
 
83
80
  try:
84
81
  output_types.append(_get_output_type_by_cls(type(obj)))