maxframe 0.1.0b4__cp311-cp311-win_amd64.whl → 1.0.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/__init__.py +1 -0
- maxframe/_utils.cp311-win_amd64.pyd +0 -0
- maxframe/codegen.py +56 -5
- maxframe/config/config.py +78 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +2 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +58 -12
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +120 -24
- maxframe/dataframe/datasource/read_odps_table.py +9 -4
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/__init__.py +4 -0
- maxframe/dataframe/misc/apply.py +6 -11
- maxframe/dataframe/misc/case_when.py +141 -0
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/pivot_table.py +262 -0
- maxframe/dataframe/misc/tests/test_misc.py +93 -1
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/plotting/core.py +2 -2
- maxframe/dataframe/reduction/core.py +4 -3
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +33 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +43 -12
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +75 -33
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/utils.py +52 -0
- maxframe/learn/contrib/xgboost/__init__.py +26 -0
- maxframe/learn/contrib/xgboost/classifier.py +110 -0
- maxframe/learn/contrib/xgboost/core.py +241 -0
- maxframe/learn/contrib/xgboost/dmatrix.py +147 -0
- maxframe/learn/contrib/xgboost/predict.py +121 -0
- maxframe/learn/contrib/xgboost/regressor.py +71 -0
- maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
- maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
- maxframe/learn/contrib/xgboost/train.py +132 -0
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/learn/utils/__init__.py +15 -0
- maxframe/learn/utils/core.py +29 -0
- maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +11 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +64 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +37 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +7 -2
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/scalar.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +4 -0
- maxframe/tensor/misc/atleast_1d.py +72 -0
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/misc/unique.py +205 -0
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +62 -3
- maxframe/utils.py +112 -86
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/RECORD +208 -167
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +123 -54
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +223 -40
- maxframe_client/session/task.py +108 -80
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +136 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -300
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b4.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/__init__.py
CHANGED
|
Binary file
|
maxframe/codegen.py
CHANGED
|
@@ -16,6 +16,7 @@ import abc
|
|
|
16
16
|
import base64
|
|
17
17
|
import dataclasses
|
|
18
18
|
import logging
|
|
19
|
+
from collections import defaultdict
|
|
19
20
|
from enum import Enum
|
|
20
21
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, Union
|
|
21
22
|
|
|
@@ -25,14 +26,14 @@ from odps.utils import camel_to_underline
|
|
|
25
26
|
from .core import OperatorType, Tileable, TileableGraph
|
|
26
27
|
from .core.operator import Fetch
|
|
27
28
|
from .extension import iter_extensions
|
|
29
|
+
from .io.odpsio import build_dataframe_table_meta
|
|
30
|
+
from .io.odpsio.schema import pandas_to_odps_schema
|
|
28
31
|
from .lib import wrapped_pickle as pickle
|
|
29
|
-
from .odpsio import build_dataframe_table_meta
|
|
30
|
-
from .odpsio.schema import pandas_to_odps_schema
|
|
31
32
|
from .protocol import DataFrameTableMeta, ResultInfo
|
|
32
33
|
from .serialization import PickleContainer
|
|
33
34
|
from .serialization.serializables import Serializable, StringField
|
|
34
35
|
from .typing_ import PandasObjectTypes
|
|
35
|
-
from .udf import MarkedFunction
|
|
36
|
+
from .udf import MarkedFunction, PythonPackOptions
|
|
36
37
|
|
|
37
38
|
if TYPE_CHECKING:
|
|
38
39
|
from odpsctx import ODPSSessionContext
|
|
@@ -75,8 +76,18 @@ class AbstractUDF(Serializable):
|
|
|
75
76
|
def unregister(self, odps: "ODPSSessionContext"):
|
|
76
77
|
raise NotImplementedError
|
|
77
78
|
|
|
79
|
+
@abc.abstractmethod
|
|
80
|
+
def collect_pythonpack(self) -> List[PythonPackOptions]:
|
|
81
|
+
raise NotImplementedError
|
|
82
|
+
|
|
83
|
+
@abc.abstractmethod
|
|
84
|
+
def load_pythonpack_resources(self, odps_ctx: "ODPSSessionContext") -> None:
|
|
85
|
+
raise NotImplementedError
|
|
86
|
+
|
|
78
87
|
|
|
79
88
|
class UserCodeMixin:
|
|
89
|
+
__slots__ = ()
|
|
90
|
+
|
|
80
91
|
@classmethod
|
|
81
92
|
def obj_to_python_expr(cls, obj: Any = None) -> str:
|
|
82
93
|
"""
|
|
@@ -335,6 +346,9 @@ def register_engine_codegen(type_: Type["BigDagCodeGenerator"]):
|
|
|
335
346
|
BUILTIN_ENGINE_SPE = "SPE"
|
|
336
347
|
BUILTIN_ENGINE_MCSQL = "MCSQL"
|
|
337
348
|
|
|
349
|
+
FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
|
|
350
|
+
ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
|
|
351
|
+
|
|
338
352
|
|
|
339
353
|
class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
340
354
|
_context: BigDagCodeContext
|
|
@@ -469,14 +483,51 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
|
469
483
|
output_key_to_result_infos=self._context.get_tileable_result_infos(),
|
|
470
484
|
)
|
|
471
485
|
|
|
486
|
+
def run_pythonpacks(
|
|
487
|
+
self,
|
|
488
|
+
odps_ctx: "ODPSSessionContext",
|
|
489
|
+
python_tag: str,
|
|
490
|
+
is_production: bool = False,
|
|
491
|
+
schedule_id: Optional[str] = None,
|
|
492
|
+
hints: Optional[dict] = None,
|
|
493
|
+
priority: Optional[int] = None,
|
|
494
|
+
) -> Dict[str, PythonPackOptions]:
|
|
495
|
+
key_to_packs = defaultdict(list)
|
|
496
|
+
for udf in self._context.get_udfs():
|
|
497
|
+
for pack in udf.collect_pythonpack():
|
|
498
|
+
key_to_packs[pack.key].append(pack)
|
|
499
|
+
distinct_packs = []
|
|
500
|
+
for packs in key_to_packs.values():
|
|
501
|
+
distinct_packs.append(packs[0])
|
|
502
|
+
|
|
503
|
+
inst_id_to_req = {}
|
|
504
|
+
for pack in distinct_packs:
|
|
505
|
+
inst = odps_ctx.run_pythonpack(
|
|
506
|
+
requirements=pack.requirements,
|
|
507
|
+
prefer_binary=pack.prefer_binary,
|
|
508
|
+
pre_release=pack.pre_release,
|
|
509
|
+
force_rebuild=pack.force_rebuild,
|
|
510
|
+
no_audit_wheel=pack.no_audit_wheel,
|
|
511
|
+
python_tag=python_tag,
|
|
512
|
+
is_production=is_production,
|
|
513
|
+
schedule_id=schedule_id,
|
|
514
|
+
hints=hints,
|
|
515
|
+
priority=priority,
|
|
516
|
+
)
|
|
517
|
+
# fulfill instance id of pythonpacks with same keys
|
|
518
|
+
for same_pack in key_to_packs[pack.key]:
|
|
519
|
+
same_pack.pack_instance_id = inst.id
|
|
520
|
+
inst_id_to_req[inst.id] = pack
|
|
521
|
+
return inst_id_to_req
|
|
522
|
+
|
|
472
523
|
def register_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
473
524
|
for udf in self._context.get_udfs():
|
|
474
|
-
logger.info("[Session
|
|
525
|
+
logger.info("[Session=%s] Registering UDF %s", self._session_id, udf.name)
|
|
475
526
|
udf.register(odps_ctx, True)
|
|
476
527
|
|
|
477
528
|
def unregister_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
478
529
|
for udf in self._context.get_udfs():
|
|
479
|
-
logger.info("[Session
|
|
530
|
+
logger.info("[Session=%s] Unregistering UDF %s", self._session_id, udf.name)
|
|
480
531
|
udf.unregister(odps_ctx)
|
|
481
532
|
|
|
482
533
|
def get_udfs(self) -> List[AbstractUDF]:
|
maxframe/config/config.py
CHANGED
|
@@ -19,27 +19,40 @@ import warnings
|
|
|
19
19
|
from copy import deepcopy
|
|
20
20
|
from typing import Any, Dict, Optional, Union
|
|
21
21
|
|
|
22
|
+
from odps.lib import tzlocal
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from zoneinfo import available_timezones
|
|
26
|
+
except ImportError:
|
|
27
|
+
from pytz import all_timezones
|
|
28
|
+
|
|
29
|
+
available_timezones = lambda: all_timezones
|
|
30
|
+
|
|
22
31
|
from ..utils import get_python_tag
|
|
23
32
|
from .validators import (
|
|
24
33
|
ValidatorType,
|
|
25
34
|
all_validator,
|
|
26
|
-
any_validator,
|
|
27
35
|
is_bool,
|
|
28
36
|
is_dict,
|
|
29
37
|
is_in,
|
|
30
38
|
is_integer,
|
|
39
|
+
is_non_negative_integer,
|
|
31
40
|
is_null,
|
|
32
41
|
is_numeric,
|
|
33
42
|
is_string,
|
|
43
|
+
is_valid_cache_path,
|
|
34
44
|
)
|
|
35
45
|
|
|
36
46
|
_DEFAULT_REDIRECT_WARN = "Option {source} has been replaced by {target} and might be removed in a future release."
|
|
37
47
|
_DEFAULT_MAX_ALIVE_SECONDS = 3 * 24 * 3600
|
|
38
48
|
_DEFAULT_MAX_IDLE_SECONDS = 3600
|
|
39
49
|
_DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS = 120
|
|
50
|
+
_DEFAULT_SPE_FAILURE_RETRY_TIMES = 5
|
|
40
51
|
_DEFAULT_UPLOAD_BATCH_SIZE = 4096
|
|
41
52
|
_DEFAULT_TEMP_LIFECYCLE = 1
|
|
42
53
|
_DEFAULT_TASK_START_TIMEOUT = 60
|
|
54
|
+
_DEFAULT_TASK_RESTART_TIMEOUT = 300
|
|
55
|
+
_DEFAULT_LOGVIEW_HOURS = 24 * 30
|
|
43
56
|
|
|
44
57
|
|
|
45
58
|
class OptionError(Exception):
|
|
@@ -295,23 +308,60 @@ class Config:
|
|
|
295
308
|
return {k: v for k, v in res.items() if k in self._remote_options}
|
|
296
309
|
|
|
297
310
|
|
|
298
|
-
|
|
311
|
+
def _get_legal_local_tz_name() -> Optional[str]:
|
|
312
|
+
"""Sometimes we may get illegal tz name from tzlocal.get_localzone()"""
|
|
313
|
+
tz_name = str(tzlocal.get_localzone())
|
|
314
|
+
if tz_name not in available_timezones():
|
|
315
|
+
return None
|
|
316
|
+
return tz_name
|
|
317
|
+
|
|
299
318
|
|
|
319
|
+
default_options = Config()
|
|
300
320
|
default_options.register_option(
|
|
301
321
|
"execution_mode", "trigger", validator=is_in(["trigger", "eager"])
|
|
302
322
|
)
|
|
323
|
+
default_options.register_option("use_common_table", False, validator=is_bool)
|
|
303
324
|
default_options.register_option(
|
|
304
325
|
"python_tag", get_python_tag(), validator=is_string, remote=True
|
|
305
326
|
)
|
|
327
|
+
default_options.register_option(
|
|
328
|
+
"local_timezone",
|
|
329
|
+
_get_legal_local_tz_name(),
|
|
330
|
+
validator=is_null | is_in(set(available_timezones())),
|
|
331
|
+
remote=True,
|
|
332
|
+
)
|
|
333
|
+
default_options.register_option(
|
|
334
|
+
"session.logview_hours", _DEFAULT_LOGVIEW_HOURS, validator=is_integer, remote=True
|
|
335
|
+
)
|
|
306
336
|
default_options.register_option(
|
|
307
337
|
"client.task_start_timeout", _DEFAULT_TASK_START_TIMEOUT, validator=is_integer
|
|
308
338
|
)
|
|
339
|
+
default_options.register_option(
|
|
340
|
+
"client.task_restart_timeout", _DEFAULT_TASK_RESTART_TIMEOUT, validator=is_integer
|
|
341
|
+
)
|
|
309
342
|
default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remote=True)
|
|
310
343
|
default_options.register_option(
|
|
311
344
|
"sql.generate_comments", True, validator=is_bool, remote=True
|
|
312
345
|
)
|
|
346
|
+
default_options.register_option(
|
|
347
|
+
"sql.auto_use_common_image", True, validator=is_bool, remote=True
|
|
348
|
+
)
|
|
313
349
|
default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
|
|
314
350
|
|
|
351
|
+
default_options.register_option("is_production", False, validator=is_bool, remote=True)
|
|
352
|
+
default_options.register_option("schedule_id", "", validator=is_string, remote=True)
|
|
353
|
+
|
|
354
|
+
default_options.register_option(
|
|
355
|
+
"service_role_arn", None, validator=is_null | is_string, remote=True
|
|
356
|
+
)
|
|
357
|
+
default_options.register_option(
|
|
358
|
+
"object_cache_url", None, validator=is_null | is_valid_cache_path, remote=True
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
default_options.register_option(
|
|
362
|
+
"chunk_size", None, validator=is_null | is_integer, remote=True
|
|
363
|
+
)
|
|
364
|
+
|
|
315
365
|
default_options.register_option(
|
|
316
366
|
"session.max_alive_seconds",
|
|
317
367
|
_DEFAULT_MAX_ALIVE_SECONDS,
|
|
@@ -324,15 +374,25 @@ default_options.register_option(
|
|
|
324
374
|
validator=is_numeric,
|
|
325
375
|
remote=True,
|
|
326
376
|
)
|
|
377
|
+
default_options.register_option(
|
|
378
|
+
"session.quota_name", None, validator=is_null | is_string, remote=True
|
|
379
|
+
)
|
|
380
|
+
default_options.register_option(
|
|
381
|
+
"session.enable_schema", None, validator=is_null | is_bool, remote=True
|
|
382
|
+
)
|
|
383
|
+
default_options.register_option(
|
|
384
|
+
"session.enable_high_availability", None, validator=is_null | is_bool, remote=True
|
|
385
|
+
)
|
|
386
|
+
default_options.register_option(
|
|
387
|
+
"session.default_schema", None, validator=is_null | is_string, remote=True
|
|
388
|
+
)
|
|
327
389
|
default_options.register_option(
|
|
328
390
|
"session.upload_batch_size",
|
|
329
391
|
_DEFAULT_UPLOAD_BATCH_SIZE,
|
|
330
392
|
validator=is_integer,
|
|
331
393
|
)
|
|
332
394
|
default_options.register_option(
|
|
333
|
-
"session.table_lifecycle",
|
|
334
|
-
None,
|
|
335
|
-
validator=any_validator(is_null, is_integer),
|
|
395
|
+
"session.table_lifecycle", None, validator=is_null | is_integer, remote=True
|
|
336
396
|
)
|
|
337
397
|
default_options.register_option(
|
|
338
398
|
"session.temp_table_lifecycle",
|
|
@@ -343,7 +403,7 @@ default_options.register_option(
|
|
|
343
403
|
default_options.register_option(
|
|
344
404
|
"session.subinstance_priority",
|
|
345
405
|
None,
|
|
346
|
-
validator=
|
|
406
|
+
validator=is_null | is_integer,
|
|
347
407
|
remote=True,
|
|
348
408
|
)
|
|
349
409
|
|
|
@@ -355,9 +415,7 @@ default_options.register_option(
|
|
|
355
415
|
default_options.register_option(
|
|
356
416
|
"optimize.head_optimize_threshold", 1000, validator=is_integer
|
|
357
417
|
)
|
|
358
|
-
default_options.register_option(
|
|
359
|
-
"show_progress", "auto", validator=any_validator(is_bool, is_string)
|
|
360
|
-
)
|
|
418
|
+
default_options.register_option("show_progress", "auto", validator=is_bool | is_string)
|
|
361
419
|
default_options.register_option(
|
|
362
420
|
"dag.settings", value=dict(), validator=is_dict, remote=True
|
|
363
421
|
)
|
|
@@ -368,7 +426,13 @@ default_options.register_option(
|
|
|
368
426
|
default_options.register_option(
|
|
369
427
|
"spe.operation_timeout_seconds",
|
|
370
428
|
_DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS,
|
|
371
|
-
validator=
|
|
429
|
+
validator=is_non_negative_integer,
|
|
430
|
+
remote=True,
|
|
431
|
+
)
|
|
432
|
+
default_options.register_option(
|
|
433
|
+
"spe.failure_retry_times",
|
|
434
|
+
_DEFAULT_SPE_FAILURE_RETRY_TIMES,
|
|
435
|
+
validator=is_non_negative_integer,
|
|
372
436
|
remote=True,
|
|
373
437
|
)
|
|
374
438
|
|
|
@@ -376,6 +440,10 @@ default_options.register_option(
|
|
|
376
440
|
"spe.task.settings", dict(), validator=is_dict, remote=True
|
|
377
441
|
)
|
|
378
442
|
|
|
443
|
+
default_options.register_option(
|
|
444
|
+
"pythonpack.task.settings", {}, validator=is_dict, remote=True
|
|
445
|
+
)
|
|
446
|
+
|
|
379
447
|
_options_ctx_var = contextvars.ContextVar("_options_ctx_var")
|
|
380
448
|
|
|
381
449
|
|
maxframe/config/validators.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from typing import Callable
|
|
16
|
+
from urllib.parse import urlparse
|
|
16
17
|
|
|
17
18
|
ValidatorType = Callable[..., bool]
|
|
18
19
|
|
|
@@ -32,21 +33,51 @@ def all_validator(*validators: ValidatorType):
|
|
|
32
33
|
return validate
|
|
33
34
|
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
class Validator:
|
|
37
|
+
def __init__(self, func: ValidatorType):
|
|
38
|
+
self._func = func
|
|
39
|
+
|
|
40
|
+
def __call__(self, arg) -> bool:
|
|
41
|
+
return self._func(arg)
|
|
42
|
+
|
|
43
|
+
def __or__(self, other):
|
|
44
|
+
return OrValidator(self, other)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class OrValidator(Validator):
|
|
48
|
+
def __init__(self, lhs: Validator, rhs: Validator):
|
|
49
|
+
super().__init__(lambda x: lhs(x) or rhs(x))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
is_null = Validator(lambda x: x is None)
|
|
53
|
+
is_bool = Validator(lambda x: isinstance(x, bool))
|
|
54
|
+
is_float = Validator(lambda x: isinstance(x, float))
|
|
55
|
+
is_integer = Validator(lambda x: isinstance(x, int))
|
|
56
|
+
is_numeric = Validator(lambda x: isinstance(x, (int, float)))
|
|
57
|
+
is_string = Validator(lambda x: isinstance(x, str))
|
|
58
|
+
is_dict = Validator(lambda x: isinstance(x, dict))
|
|
59
|
+
is_positive_integer = Validator(lambda x: is_integer(x) and x > 0)
|
|
60
|
+
is_non_negative_integer = Validator(lambda x: is_integer(x) and x >= 0)
|
|
43
61
|
|
|
44
62
|
|
|
45
63
|
def is_in(vals):
|
|
46
|
-
|
|
47
|
-
return x in vals
|
|
64
|
+
return Validator(vals.__contains__)
|
|
48
65
|
|
|
49
|
-
|
|
66
|
+
|
|
67
|
+
def _is_valid_cache_path(path: str) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
path should look like oss://oss_endpoint/oss_bucket/path
|
|
70
|
+
"""
|
|
71
|
+
parsed_url = urlparse(path)
|
|
72
|
+
return (
|
|
73
|
+
parsed_url.scheme == "oss"
|
|
74
|
+
and parsed_url.netloc
|
|
75
|
+
and parsed_url.path
|
|
76
|
+
and "/" in parsed_url.path
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
is_valid_cache_path = Validator(_is_valid_cache_path)
|
|
50
81
|
|
|
51
82
|
|
|
52
83
|
_invalid_char_in_yaml_str = {'"', "'", "\n", "\\"}
|
maxframe/conftest.py
CHANGED
|
@@ -14,10 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import faulthandler
|
|
16
16
|
import os
|
|
17
|
-
from configparser import ConfigParser, NoOptionError
|
|
17
|
+
from configparser import ConfigParser, NoOptionError, NoSectionError
|
|
18
18
|
|
|
19
19
|
import pytest
|
|
20
20
|
from odps import ODPS
|
|
21
|
+
from odps.accounts import BearerTokenAccount
|
|
22
|
+
|
|
23
|
+
from .config import options
|
|
21
24
|
|
|
22
25
|
faulthandler.enable(all_threads=True)
|
|
23
26
|
_test_conf_file_name = os.path.join(
|
|
@@ -32,12 +35,23 @@ def test_config():
|
|
|
32
35
|
return config
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
|
|
39
|
+
try:
|
|
40
|
+
access_id = test_config.get(section_name, "access_id")
|
|
41
|
+
except NoOptionError:
|
|
42
|
+
access_id = test_config.get("odps", "access_id")
|
|
43
|
+
try:
|
|
44
|
+
secret_access_key = test_config.get(section_name, "secret_access_key")
|
|
45
|
+
except NoOptionError:
|
|
46
|
+
secret_access_key = test_config.get("odps", "secret_access_key")
|
|
47
|
+
try:
|
|
48
|
+
project = test_config.get(section_name, "project")
|
|
49
|
+
except NoOptionError:
|
|
50
|
+
project = test_config.get("odps", "project")
|
|
51
|
+
try:
|
|
52
|
+
endpoint = test_config.get(section_name, "endpoint")
|
|
53
|
+
except NoOptionError:
|
|
54
|
+
endpoint = test_config.get("odps", "endpoint")
|
|
41
55
|
try:
|
|
42
56
|
tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
|
|
43
57
|
except NoOptionError:
|
|
@@ -53,12 +67,31 @@ def odps_envs(test_config):
|
|
|
53
67
|
],
|
|
54
68
|
}
|
|
55
69
|
token = entry.get_project().generate_auth_token(policy, "bearer", 5)
|
|
70
|
+
return ODPS(
|
|
71
|
+
account=BearerTokenAccount(token, 5),
|
|
72
|
+
project=project,
|
|
73
|
+
endpoint=endpoint,
|
|
74
|
+
tunnel_endpoint=tunnel_endpoint,
|
|
75
|
+
)
|
|
56
76
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
77
|
+
|
|
78
|
+
@pytest.fixture(scope="session")
|
|
79
|
+
def odps_with_schema(test_config):
|
|
80
|
+
try:
|
|
81
|
+
return _get_odps_env(test_config, "odps_with_schema")
|
|
82
|
+
except NoSectionError:
|
|
83
|
+
pytest.skip("Need to specify odps_with_schema section in test.conf")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
87
|
+
def odps_envs(test_config):
|
|
88
|
+
entry = _get_odps_env(test_config, "odps")
|
|
89
|
+
|
|
90
|
+
os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
|
|
91
|
+
os.environ["ODPS_PROJECT_NAME"] = entry.project
|
|
92
|
+
os.environ["ODPS_ENDPOINT"] = entry.endpoint
|
|
93
|
+
if entry.tunnel_endpoint:
|
|
94
|
+
os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
|
|
62
95
|
|
|
63
96
|
try:
|
|
64
97
|
yield
|
|
@@ -77,16 +110,23 @@ def odps_envs(test_config):
|
|
|
77
110
|
pass
|
|
78
111
|
|
|
79
112
|
|
|
80
|
-
@pytest.fixture
|
|
113
|
+
@pytest.fixture(scope="session")
|
|
81
114
|
def oss_config():
|
|
82
115
|
config = ConfigParser()
|
|
83
116
|
config.read(_test_conf_file_name)
|
|
84
117
|
|
|
118
|
+
old_role_arn = options.service_role_arn
|
|
119
|
+
old_cache_url = options.object_cache_url
|
|
120
|
+
|
|
85
121
|
try:
|
|
86
122
|
oss_access_id = config.get("oss", "access_id")
|
|
87
123
|
oss_secret_access_key = config.get("oss", "secret_access_key")
|
|
88
124
|
oss_bucket_name = config.get("oss", "bucket_name")
|
|
89
125
|
oss_endpoint = config.get("oss", "endpoint")
|
|
126
|
+
oss_rolearn = config.get("oss", "rolearn")
|
|
127
|
+
|
|
128
|
+
options.service_role_arn = oss_rolearn
|
|
129
|
+
options.object_cache_url = f"oss://{oss_endpoint}/{oss_bucket_name}"
|
|
90
130
|
|
|
91
131
|
config.oss_config = (
|
|
92
132
|
oss_access_id,
|
|
@@ -99,9 +139,13 @@ def oss_config():
|
|
|
99
139
|
|
|
100
140
|
auth = oss2.Auth(oss_access_id, oss_secret_access_key)
|
|
101
141
|
config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
|
|
102
|
-
|
|
142
|
+
config.oss_rolearn = oss_rolearn
|
|
143
|
+
yield config
|
|
103
144
|
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError, ImportError):
|
|
104
145
|
return None
|
|
146
|
+
finally:
|
|
147
|
+
options.service_role_arn = old_role_arn
|
|
148
|
+
options.object_cache_url = old_cache_url
|
|
105
149
|
|
|
106
150
|
|
|
107
151
|
@pytest.fixture(autouse=True)
|
maxframe/core/__init__.py
CHANGED
|
@@ -14,27 +14,18 @@
|
|
|
14
14
|
|
|
15
15
|
# noinspection PyUnresolvedReferences
|
|
16
16
|
from ..typing_ import ChunkType, EntityType, OperatorType, TileableType
|
|
17
|
-
from .base import ExecutionError
|
|
17
|
+
from .base import Base, ExecutionError
|
|
18
18
|
from .entity import (
|
|
19
|
-
CHUNK_TYPE,
|
|
20
19
|
ENTITY_TYPE,
|
|
21
|
-
FUSE_CHUNK_TYPE,
|
|
22
|
-
OBJECT_CHUNK_TYPE,
|
|
23
20
|
OBJECT_TYPE,
|
|
24
21
|
TILEABLE_TYPE,
|
|
25
|
-
Chunk,
|
|
26
|
-
ChunkData,
|
|
27
22
|
Entity,
|
|
28
23
|
EntityData,
|
|
29
24
|
ExecutableTuple,
|
|
30
|
-
FuseChunk,
|
|
31
|
-
FuseChunkData,
|
|
32
25
|
HasShapeTileable,
|
|
33
26
|
HasShapeTileableData,
|
|
34
27
|
NotSupportTile,
|
|
35
28
|
Object,
|
|
36
|
-
ObjectChunk,
|
|
37
|
-
ObjectChunkData,
|
|
38
29
|
ObjectData,
|
|
39
30
|
OutputType,
|
|
40
31
|
Tileable,
|
|
@@ -43,23 +34,18 @@ from .entity import (
|
|
|
43
34
|
get_fetch_class,
|
|
44
35
|
get_output_types,
|
|
45
36
|
get_tileable_types,
|
|
46
|
-
register,
|
|
47
37
|
register_fetch_class,
|
|
48
38
|
register_output_types,
|
|
49
|
-
unregister,
|
|
50
39
|
)
|
|
51
40
|
|
|
52
41
|
# noinspection PyUnresolvedReferences
|
|
53
42
|
from .graph import (
|
|
54
43
|
DAG,
|
|
55
|
-
ChunkGraph,
|
|
56
|
-
ChunkGraphBuilder,
|
|
57
44
|
DirectedGraph,
|
|
58
45
|
GraphContainsCycleError,
|
|
59
46
|
GraphSerializer,
|
|
60
47
|
TileableGraph,
|
|
61
48
|
TileableGraphBuilder,
|
|
62
|
-
TileContext,
|
|
63
|
-
TileStatus,
|
|
64
49
|
)
|
|
65
50
|
from .mode import enter_mode, is_build_mode, is_eager_mode, is_kernel_mode
|
|
51
|
+
from .operator import build_fetch
|
maxframe/core/entity/__init__.py
CHANGED
|
@@ -12,18 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .chunks import CHUNK_TYPE, Chunk, ChunkData
|
|
16
15
|
from .core import ENTITY_TYPE, Entity, EntityData
|
|
17
16
|
from .executable import ExecutableTuple, _ExecuteAndFetchMixin
|
|
18
|
-
from .
|
|
19
|
-
from .objects import (
|
|
20
|
-
OBJECT_CHUNK_TYPE,
|
|
21
|
-
OBJECT_TYPE,
|
|
22
|
-
Object,
|
|
23
|
-
ObjectChunk,
|
|
24
|
-
ObjectChunkData,
|
|
25
|
-
ObjectData,
|
|
26
|
-
)
|
|
17
|
+
from .objects import OBJECT_TYPE, Object, ObjectData
|
|
27
18
|
from .output_types import (
|
|
28
19
|
OutputType,
|
|
29
20
|
get_fetch_class,
|
|
@@ -39,6 +30,4 @@ from .tileables import (
|
|
|
39
30
|
NotSupportTile,
|
|
40
31
|
Tileable,
|
|
41
32
|
TileableData,
|
|
42
|
-
register,
|
|
43
|
-
unregister,
|
|
44
33
|
)
|