maxframe 0.1.0b5__cp38-cp38-win32.whl → 1.0.0__cp38-cp38-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of maxframe might be problematic. Click here for more details.
- maxframe/_utils.cp38-win32.pyd +0 -0
- maxframe/codegen.py +10 -4
- maxframe/config/config.py +68 -10
- maxframe/config/validators.py +42 -11
- maxframe/conftest.py +58 -14
- maxframe/core/__init__.py +2 -16
- maxframe/core/entity/__init__.py +1 -12
- maxframe/core/entity/executable.py +1 -1
- maxframe/core/entity/objects.py +46 -45
- maxframe/core/entity/output_types.py +0 -3
- maxframe/core/entity/tests/test_objects.py +43 -0
- maxframe/core/entity/tileables.py +5 -78
- maxframe/core/graph/__init__.py +2 -2
- maxframe/core/graph/builder/__init__.py +0 -1
- maxframe/core/graph/builder/base.py +5 -4
- maxframe/core/graph/builder/tileable.py +4 -4
- maxframe/core/graph/builder/utils.py +4 -8
- maxframe/core/graph/core.cp38-win32.pyd +0 -0
- maxframe/core/graph/core.pyx +4 -4
- maxframe/core/graph/entity.py +9 -33
- maxframe/core/operator/__init__.py +2 -9
- maxframe/core/operator/base.py +3 -5
- maxframe/core/operator/objects.py +0 -9
- maxframe/core/operator/utils.py +55 -0
- maxframe/dataframe/__init__.py +1 -1
- maxframe/dataframe/arithmetic/around.py +5 -17
- maxframe/dataframe/arithmetic/core.py +15 -7
- maxframe/dataframe/arithmetic/docstring.py +7 -33
- maxframe/dataframe/arithmetic/equal.py +4 -2
- maxframe/dataframe/arithmetic/greater.py +4 -2
- maxframe/dataframe/arithmetic/greater_equal.py +4 -2
- maxframe/dataframe/arithmetic/less.py +2 -2
- maxframe/dataframe/arithmetic/less_equal.py +4 -2
- maxframe/dataframe/arithmetic/not_equal.py +4 -2
- maxframe/dataframe/arithmetic/tests/test_arithmetic.py +39 -16
- maxframe/dataframe/core.py +31 -7
- maxframe/dataframe/datasource/date_range.py +2 -2
- maxframe/dataframe/datasource/read_odps_query.py +117 -23
- maxframe/dataframe/datasource/read_odps_table.py +6 -3
- maxframe/dataframe/datasource/tests/test_datasource.py +103 -8
- maxframe/dataframe/datastore/tests/test_to_odps.py +48 -0
- maxframe/dataframe/datastore/to_odps.py +28 -0
- maxframe/dataframe/extensions/__init__.py +5 -0
- maxframe/dataframe/extensions/flatjson.py +131 -0
- maxframe/dataframe/extensions/flatmap.py +317 -0
- maxframe/dataframe/extensions/reshuffle.py +1 -1
- maxframe/dataframe/extensions/tests/test_extensions.py +108 -3
- maxframe/dataframe/groupby/core.py +1 -1
- maxframe/dataframe/groupby/cum.py +0 -1
- maxframe/dataframe/groupby/fill.py +4 -1
- maxframe/dataframe/groupby/getitem.py +6 -0
- maxframe/dataframe/groupby/tests/test_groupby.py +5 -1
- maxframe/dataframe/groupby/transform.py +5 -1
- maxframe/dataframe/indexing/align.py +1 -1
- maxframe/dataframe/indexing/loc.py +6 -4
- maxframe/dataframe/indexing/rename.py +5 -28
- maxframe/dataframe/indexing/sample.py +0 -1
- maxframe/dataframe/indexing/set_index.py +68 -1
- maxframe/dataframe/initializer.py +11 -1
- maxframe/dataframe/merge/__init__.py +9 -1
- maxframe/dataframe/merge/concat.py +41 -31
- maxframe/dataframe/merge/merge.py +237 -3
- maxframe/dataframe/merge/tests/test_merge.py +126 -1
- maxframe/dataframe/misc/apply.py +5 -10
- maxframe/dataframe/misc/case_when.py +1 -1
- maxframe/dataframe/misc/describe.py +2 -2
- maxframe/dataframe/misc/drop_duplicates.py +8 -8
- maxframe/dataframe/misc/eval.py +4 -0
- maxframe/dataframe/misc/memory_usage.py +2 -2
- maxframe/dataframe/misc/pct_change.py +1 -83
- maxframe/dataframe/misc/tests/test_misc.py +33 -2
- maxframe/dataframe/misc/transform.py +1 -30
- maxframe/dataframe/misc/value_counts.py +4 -17
- maxframe/dataframe/missing/dropna.py +1 -1
- maxframe/dataframe/missing/fillna.py +5 -5
- maxframe/dataframe/operators.py +1 -17
- maxframe/dataframe/reduction/core.py +2 -2
- maxframe/dataframe/reduction/tests/test_reduction.py +2 -4
- maxframe/dataframe/sort/sort_values.py +1 -11
- maxframe/dataframe/statistics/corr.py +3 -3
- maxframe/dataframe/statistics/quantile.py +13 -19
- maxframe/dataframe/statistics/tests/test_statistics.py +4 -4
- maxframe/dataframe/tests/test_initializer.py +33 -2
- maxframe/dataframe/utils.py +26 -11
- maxframe/dataframe/window/expanding.py +5 -3
- maxframe/dataframe/window/tests/test_expanding.py +2 -2
- maxframe/errors.py +13 -0
- maxframe/extension.py +12 -0
- maxframe/io/__init__.py +13 -0
- maxframe/io/objects/__init__.py +24 -0
- maxframe/io/objects/core.py +140 -0
- maxframe/io/objects/tensor.py +76 -0
- maxframe/io/objects/tests/__init__.py +13 -0
- maxframe/io/objects/tests/test_object_io.py +97 -0
- maxframe/{odpsio → io/odpsio}/__init__.py +3 -1
- maxframe/{odpsio → io/odpsio}/arrow.py +42 -10
- maxframe/{odpsio → io/odpsio}/schema.py +38 -16
- maxframe/io/odpsio/tableio.py +719 -0
- maxframe/io/odpsio/tests/__init__.py +13 -0
- maxframe/{odpsio → io/odpsio}/tests/test_schema.py +59 -22
- maxframe/{odpsio → io/odpsio}/tests/test_tableio.py +50 -23
- maxframe/{odpsio → io/odpsio}/tests/test_volumeio.py +4 -6
- maxframe/io/odpsio/volumeio.py +63 -0
- maxframe/learn/contrib/__init__.py +3 -1
- maxframe/learn/contrib/graph/__init__.py +15 -0
- maxframe/learn/contrib/graph/connected_components.py +215 -0
- maxframe/learn/contrib/graph/tests/__init__.py +13 -0
- maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
- maxframe/learn/contrib/llm/__init__.py +16 -0
- maxframe/learn/contrib/llm/core.py +54 -0
- maxframe/learn/contrib/llm/models/__init__.py +14 -0
- maxframe/learn/contrib/llm/models/dashscope.py +73 -0
- maxframe/learn/contrib/llm/multi_modal.py +42 -0
- maxframe/learn/contrib/llm/text.py +42 -0
- maxframe/learn/contrib/xgboost/classifier.py +26 -2
- maxframe/learn/contrib/xgboost/core.py +87 -2
- maxframe/learn/contrib/xgboost/dmatrix.py +3 -6
- maxframe/learn/contrib/xgboost/predict.py +29 -46
- maxframe/learn/contrib/xgboost/regressor.py +3 -10
- maxframe/learn/contrib/xgboost/train.py +29 -18
- maxframe/{core/operator/fuse.py → learn/core.py} +7 -10
- maxframe/lib/mmh3.cp38-win32.pyd +0 -0
- maxframe/lib/mmh3.pyi +43 -0
- maxframe/lib/sparse/tests/test_sparse.py +15 -15
- maxframe/lib/wrapped_pickle.py +2 -1
- maxframe/opcodes.py +8 -0
- maxframe/protocol.py +154 -27
- maxframe/remote/core.py +4 -8
- maxframe/serialization/__init__.py +1 -0
- maxframe/serialization/core.cp38-win32.pyd +0 -0
- maxframe/serialization/core.pxd +3 -0
- maxframe/serialization/core.pyi +3 -0
- maxframe/serialization/core.pyx +67 -26
- maxframe/serialization/exception.py +1 -1
- maxframe/serialization/pandas.py +52 -17
- maxframe/serialization/serializables/core.py +180 -15
- maxframe/serialization/serializables/field_type.py +4 -1
- maxframe/serialization/serializables/tests/test_serializable.py +54 -5
- maxframe/serialization/tests/test_serial.py +2 -1
- maxframe/session.py +9 -2
- maxframe/tensor/__init__.py +81 -2
- maxframe/tensor/arithmetic/isclose.py +1 -0
- maxframe/tensor/arithmetic/tests/test_arithmetic.py +22 -18
- maxframe/tensor/core.py +5 -136
- maxframe/tensor/datasource/array.py +3 -0
- maxframe/tensor/datasource/full.py +1 -1
- maxframe/tensor/datasource/tests/test_datasource.py +1 -1
- maxframe/tensor/indexing/flatnonzero.py +1 -1
- maxframe/tensor/indexing/getitem.py +2 -0
- maxframe/tensor/merge/__init__.py +2 -0
- maxframe/tensor/merge/concatenate.py +101 -0
- maxframe/tensor/merge/tests/test_merge.py +30 -1
- maxframe/tensor/merge/vstack.py +74 -0
- maxframe/tensor/{base → misc}/__init__.py +2 -0
- maxframe/tensor/{base → misc}/atleast_1d.py +1 -3
- maxframe/tensor/misc/atleast_2d.py +70 -0
- maxframe/tensor/misc/atleast_3d.py +85 -0
- maxframe/tensor/misc/tests/__init__.py +13 -0
- maxframe/tensor/{base → misc}/transpose.py +22 -18
- maxframe/tensor/{base → misc}/unique.py +3 -3
- maxframe/tensor/operators.py +1 -7
- maxframe/tensor/random/core.py +1 -1
- maxframe/tensor/reduction/count_nonzero.py +2 -1
- maxframe/tensor/reduction/mean.py +1 -0
- maxframe/tensor/reduction/nanmean.py +1 -0
- maxframe/tensor/reduction/nanvar.py +2 -0
- maxframe/tensor/reduction/tests/test_reduction.py +12 -1
- maxframe/tensor/reduction/var.py +2 -0
- maxframe/tensor/statistics/quantile.py +2 -2
- maxframe/tensor/utils.py +2 -22
- maxframe/tests/test_protocol.py +34 -0
- maxframe/tests/test_utils.py +0 -12
- maxframe/tests/utils.py +17 -2
- maxframe/typing_.py +4 -1
- maxframe/udf.py +8 -9
- maxframe/utils.py +106 -86
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/METADATA +25 -25
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/RECORD +197 -173
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/WHEEL +1 -1
- maxframe_client/__init__.py +0 -1
- maxframe_client/clients/framedriver.py +4 -1
- maxframe_client/fetcher.py +81 -74
- maxframe_client/session/consts.py +3 -0
- maxframe_client/session/graph.py +8 -2
- maxframe_client/session/odps.py +194 -40
- maxframe_client/session/task.py +94 -39
- maxframe_client/tests/test_fetcher.py +21 -3
- maxframe_client/tests/test_session.py +109 -8
- maxframe/core/entity/chunks.py +0 -68
- maxframe/core/entity/fuse.py +0 -73
- maxframe/core/graph/builder/chunk.py +0 -430
- maxframe/odpsio/tableio.py +0 -322
- maxframe/odpsio/volumeio.py +0 -95
- maxframe_client/clients/spe.py +0 -104
- /maxframe/{odpsio → core/entity}/tests/__init__.py +0 -0
- /maxframe/{tensor/base → dataframe/datastore}/tests/__init__.py +0 -0
- /maxframe/{odpsio → io/odpsio}/tests/test_arrow.py +0 -0
- /maxframe/tensor/{base → misc}/astype.py +0 -0
- /maxframe/tensor/{base → misc}/broadcast_to.py +0 -0
- /maxframe/tensor/{base → misc}/ravel.py +0 -0
- /maxframe/tensor/{base/tests/test_base.py → misc/tests/test_misc.py} +0 -0
- /maxframe/tensor/{base → misc}/where.py +0 -0
- {maxframe-0.1.0b5.dist-info → maxframe-1.0.0.dist-info}/top_level.txt +0 -0
maxframe/_utils.cp38-win32.pyd
CHANGED
|
Binary file
|
maxframe/codegen.py
CHANGED
|
@@ -26,9 +26,9 @@ from odps.utils import camel_to_underline
|
|
|
26
26
|
from .core import OperatorType, Tileable, TileableGraph
|
|
27
27
|
from .core.operator import Fetch
|
|
28
28
|
from .extension import iter_extensions
|
|
29
|
+
from .io.odpsio import build_dataframe_table_meta
|
|
30
|
+
from .io.odpsio.schema import pandas_to_odps_schema
|
|
29
31
|
from .lib import wrapped_pickle as pickle
|
|
30
|
-
from .odpsio import build_dataframe_table_meta
|
|
31
|
-
from .odpsio.schema import pandas_to_odps_schema
|
|
32
32
|
from .protocol import DataFrameTableMeta, ResultInfo
|
|
33
33
|
from .serialization import PickleContainer
|
|
34
34
|
from .serialization.serializables import Serializable, StringField
|
|
@@ -86,6 +86,8 @@ class AbstractUDF(Serializable):
|
|
|
86
86
|
|
|
87
87
|
|
|
88
88
|
class UserCodeMixin:
|
|
89
|
+
__slots__ = ()
|
|
90
|
+
|
|
89
91
|
@classmethod
|
|
90
92
|
def obj_to_python_expr(cls, obj: Any = None) -> str:
|
|
91
93
|
"""
|
|
@@ -344,6 +346,9 @@ def register_engine_codegen(type_: Type["BigDagCodeGenerator"]):
|
|
|
344
346
|
BUILTIN_ENGINE_SPE = "SPE"
|
|
345
347
|
BUILTIN_ENGINE_MCSQL = "MCSQL"
|
|
346
348
|
|
|
349
|
+
FAST_RANGE_INDEX_ENABLED = "codegen.fast_range_index_enabled"
|
|
350
|
+
ROW_NUMBER_WINDOW_INDEX_ENABLED = "codegen.row_number_window_index_enabled"
|
|
351
|
+
|
|
347
352
|
|
|
348
353
|
class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
349
354
|
_context: BigDagCodeContext
|
|
@@ -502,6 +507,7 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
|
502
507
|
prefer_binary=pack.prefer_binary,
|
|
503
508
|
pre_release=pack.pre_release,
|
|
504
509
|
force_rebuild=pack.force_rebuild,
|
|
510
|
+
no_audit_wheel=pack.no_audit_wheel,
|
|
505
511
|
python_tag=python_tag,
|
|
506
512
|
is_production=is_production,
|
|
507
513
|
schedule_id=schedule_id,
|
|
@@ -516,12 +522,12 @@ class BigDagCodeGenerator(metaclass=abc.ABCMeta):
|
|
|
516
522
|
|
|
517
523
|
def register_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
518
524
|
for udf in self._context.get_udfs():
|
|
519
|
-
logger.info("[Session
|
|
525
|
+
logger.info("[Session=%s] Registering UDF %s", self._session_id, udf.name)
|
|
520
526
|
udf.register(odps_ctx, True)
|
|
521
527
|
|
|
522
528
|
def unregister_udfs(self, odps_ctx: "ODPSSessionContext"):
|
|
523
529
|
for udf in self._context.get_udfs():
|
|
524
|
-
logger.info("[Session
|
|
530
|
+
logger.info("[Session=%s] Unregistering UDF %s", self._session_id, udf.name)
|
|
525
531
|
udf.unregister(odps_ctx)
|
|
526
532
|
|
|
527
533
|
def get_udfs(self) -> List[AbstractUDF]:
|
maxframe/config/config.py
CHANGED
|
@@ -19,28 +19,40 @@ import warnings
|
|
|
19
19
|
from copy import deepcopy
|
|
20
20
|
from typing import Any, Dict, Optional, Union
|
|
21
21
|
|
|
22
|
+
from odps.lib import tzlocal
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from zoneinfo import available_timezones
|
|
26
|
+
except ImportError:
|
|
27
|
+
from pytz import all_timezones
|
|
28
|
+
|
|
29
|
+
available_timezones = lambda: all_timezones
|
|
30
|
+
|
|
22
31
|
from ..utils import get_python_tag
|
|
23
32
|
from .validators import (
|
|
24
33
|
ValidatorType,
|
|
25
34
|
all_validator,
|
|
26
|
-
any_validator,
|
|
27
35
|
is_bool,
|
|
28
36
|
is_dict,
|
|
29
37
|
is_in,
|
|
30
38
|
is_integer,
|
|
39
|
+
is_non_negative_integer,
|
|
31
40
|
is_null,
|
|
32
41
|
is_numeric,
|
|
33
42
|
is_string,
|
|
43
|
+
is_valid_cache_path,
|
|
34
44
|
)
|
|
35
45
|
|
|
36
46
|
_DEFAULT_REDIRECT_WARN = "Option {source} has been replaced by {target} and might be removed in a future release."
|
|
37
47
|
_DEFAULT_MAX_ALIVE_SECONDS = 3 * 24 * 3600
|
|
38
48
|
_DEFAULT_MAX_IDLE_SECONDS = 3600
|
|
39
49
|
_DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS = 120
|
|
50
|
+
_DEFAULT_SPE_FAILURE_RETRY_TIMES = 5
|
|
40
51
|
_DEFAULT_UPLOAD_BATCH_SIZE = 4096
|
|
41
52
|
_DEFAULT_TEMP_LIFECYCLE = 1
|
|
42
53
|
_DEFAULT_TASK_START_TIMEOUT = 60
|
|
43
|
-
|
|
54
|
+
_DEFAULT_TASK_RESTART_TIMEOUT = 300
|
|
55
|
+
_DEFAULT_LOGVIEW_HOURS = 24 * 30
|
|
44
56
|
|
|
45
57
|
|
|
46
58
|
class OptionError(Exception):
|
|
@@ -296,28 +308,60 @@ class Config:
|
|
|
296
308
|
return {k: v for k, v in res.items() if k in self._remote_options}
|
|
297
309
|
|
|
298
310
|
|
|
311
|
+
def _get_legal_local_tz_name() -> Optional[str]:
|
|
312
|
+
"""Sometimes we may get illegal tz name from tzlocal.get_localzone()"""
|
|
313
|
+
tz_name = str(tzlocal.get_localzone())
|
|
314
|
+
if tz_name not in available_timezones():
|
|
315
|
+
return None
|
|
316
|
+
return tz_name
|
|
317
|
+
|
|
318
|
+
|
|
299
319
|
default_options = Config()
|
|
300
320
|
default_options.register_option(
|
|
301
321
|
"execution_mode", "trigger", validator=is_in(["trigger", "eager"])
|
|
302
322
|
)
|
|
323
|
+
default_options.register_option("use_common_table", False, validator=is_bool)
|
|
303
324
|
default_options.register_option(
|
|
304
325
|
"python_tag", get_python_tag(), validator=is_string, remote=True
|
|
305
326
|
)
|
|
327
|
+
default_options.register_option(
|
|
328
|
+
"local_timezone",
|
|
329
|
+
_get_legal_local_tz_name(),
|
|
330
|
+
validator=is_null | is_in(set(available_timezones())),
|
|
331
|
+
remote=True,
|
|
332
|
+
)
|
|
306
333
|
default_options.register_option(
|
|
307
334
|
"session.logview_hours", _DEFAULT_LOGVIEW_HOURS, validator=is_integer, remote=True
|
|
308
335
|
)
|
|
309
336
|
default_options.register_option(
|
|
310
337
|
"client.task_start_timeout", _DEFAULT_TASK_START_TIMEOUT, validator=is_integer
|
|
311
338
|
)
|
|
339
|
+
default_options.register_option(
|
|
340
|
+
"client.task_restart_timeout", _DEFAULT_TASK_RESTART_TIMEOUT, validator=is_integer
|
|
341
|
+
)
|
|
312
342
|
default_options.register_option("sql.enable_mcqa", True, validator=is_bool, remote=True)
|
|
313
343
|
default_options.register_option(
|
|
314
344
|
"sql.generate_comments", True, validator=is_bool, remote=True
|
|
315
345
|
)
|
|
346
|
+
default_options.register_option(
|
|
347
|
+
"sql.auto_use_common_image", True, validator=is_bool, remote=True
|
|
348
|
+
)
|
|
316
349
|
default_options.register_option("sql.settings", {}, validator=is_dict, remote=True)
|
|
317
350
|
|
|
318
351
|
default_options.register_option("is_production", False, validator=is_bool, remote=True)
|
|
319
352
|
default_options.register_option("schedule_id", "", validator=is_string, remote=True)
|
|
320
353
|
|
|
354
|
+
default_options.register_option(
|
|
355
|
+
"service_role_arn", None, validator=is_null | is_string, remote=True
|
|
356
|
+
)
|
|
357
|
+
default_options.register_option(
|
|
358
|
+
"object_cache_url", None, validator=is_null | is_valid_cache_path, remote=True
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
default_options.register_option(
|
|
362
|
+
"chunk_size", None, validator=is_null | is_integer, remote=True
|
|
363
|
+
)
|
|
364
|
+
|
|
321
365
|
default_options.register_option(
|
|
322
366
|
"session.max_alive_seconds",
|
|
323
367
|
_DEFAULT_MAX_ALIVE_SECONDS,
|
|
@@ -330,15 +374,25 @@ default_options.register_option(
|
|
|
330
374
|
validator=is_numeric,
|
|
331
375
|
remote=True,
|
|
332
376
|
)
|
|
377
|
+
default_options.register_option(
|
|
378
|
+
"session.quota_name", None, validator=is_null | is_string, remote=True
|
|
379
|
+
)
|
|
380
|
+
default_options.register_option(
|
|
381
|
+
"session.enable_schema", None, validator=is_null | is_bool, remote=True
|
|
382
|
+
)
|
|
383
|
+
default_options.register_option(
|
|
384
|
+
"session.enable_high_availability", None, validator=is_null | is_bool, remote=True
|
|
385
|
+
)
|
|
386
|
+
default_options.register_option(
|
|
387
|
+
"session.default_schema", None, validator=is_null | is_string, remote=True
|
|
388
|
+
)
|
|
333
389
|
default_options.register_option(
|
|
334
390
|
"session.upload_batch_size",
|
|
335
391
|
_DEFAULT_UPLOAD_BATCH_SIZE,
|
|
336
392
|
validator=is_integer,
|
|
337
393
|
)
|
|
338
394
|
default_options.register_option(
|
|
339
|
-
"session.table_lifecycle",
|
|
340
|
-
None,
|
|
341
|
-
validator=any_validator(is_null, is_integer),
|
|
395
|
+
"session.table_lifecycle", None, validator=is_null | is_integer, remote=True
|
|
342
396
|
)
|
|
343
397
|
default_options.register_option(
|
|
344
398
|
"session.temp_table_lifecycle",
|
|
@@ -349,7 +403,7 @@ default_options.register_option(
|
|
|
349
403
|
default_options.register_option(
|
|
350
404
|
"session.subinstance_priority",
|
|
351
405
|
None,
|
|
352
|
-
validator=
|
|
406
|
+
validator=is_null | is_integer,
|
|
353
407
|
remote=True,
|
|
354
408
|
)
|
|
355
409
|
|
|
@@ -361,9 +415,7 @@ default_options.register_option(
|
|
|
361
415
|
default_options.register_option(
|
|
362
416
|
"optimize.head_optimize_threshold", 1000, validator=is_integer
|
|
363
417
|
)
|
|
364
|
-
default_options.register_option(
|
|
365
|
-
"show_progress", "auto", validator=any_validator(is_bool, is_string)
|
|
366
|
-
)
|
|
418
|
+
default_options.register_option("show_progress", "auto", validator=is_bool | is_string)
|
|
367
419
|
default_options.register_option(
|
|
368
420
|
"dag.settings", value=dict(), validator=is_dict, remote=True
|
|
369
421
|
)
|
|
@@ -374,7 +426,13 @@ default_options.register_option(
|
|
|
374
426
|
default_options.register_option(
|
|
375
427
|
"spe.operation_timeout_seconds",
|
|
376
428
|
_DEFAULT_SPE_OPERATION_TIMEOUT_SECONDS,
|
|
377
|
-
validator=
|
|
429
|
+
validator=is_non_negative_integer,
|
|
430
|
+
remote=True,
|
|
431
|
+
)
|
|
432
|
+
default_options.register_option(
|
|
433
|
+
"spe.failure_retry_times",
|
|
434
|
+
_DEFAULT_SPE_FAILURE_RETRY_TIMES,
|
|
435
|
+
validator=is_non_negative_integer,
|
|
378
436
|
remote=True,
|
|
379
437
|
)
|
|
380
438
|
|
maxframe/config/validators.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
from typing import Callable
|
|
16
|
+
from urllib.parse import urlparse
|
|
16
17
|
|
|
17
18
|
ValidatorType = Callable[..., bool]
|
|
18
19
|
|
|
@@ -32,21 +33,51 @@ def all_validator(*validators: ValidatorType):
|
|
|
32
33
|
return validate
|
|
33
34
|
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
36
|
+
class Validator:
|
|
37
|
+
def __init__(self, func: ValidatorType):
|
|
38
|
+
self._func = func
|
|
39
|
+
|
|
40
|
+
def __call__(self, arg) -> bool:
|
|
41
|
+
return self._func(arg)
|
|
42
|
+
|
|
43
|
+
def __or__(self, other):
|
|
44
|
+
return OrValidator(self, other)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class OrValidator(Validator):
|
|
48
|
+
def __init__(self, lhs: Validator, rhs: Validator):
|
|
49
|
+
super().__init__(lambda x: lhs(x) or rhs(x))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
is_null = Validator(lambda x: x is None)
|
|
53
|
+
is_bool = Validator(lambda x: isinstance(x, bool))
|
|
54
|
+
is_float = Validator(lambda x: isinstance(x, float))
|
|
55
|
+
is_integer = Validator(lambda x: isinstance(x, int))
|
|
56
|
+
is_numeric = Validator(lambda x: isinstance(x, (int, float)))
|
|
57
|
+
is_string = Validator(lambda x: isinstance(x, str))
|
|
58
|
+
is_dict = Validator(lambda x: isinstance(x, dict))
|
|
59
|
+
is_positive_integer = Validator(lambda x: is_integer(x) and x > 0)
|
|
60
|
+
is_non_negative_integer = Validator(lambda x: is_integer(x) and x >= 0)
|
|
43
61
|
|
|
44
62
|
|
|
45
63
|
def is_in(vals):
|
|
46
|
-
|
|
47
|
-
return x in vals
|
|
64
|
+
return Validator(vals.__contains__)
|
|
48
65
|
|
|
49
|
-
|
|
66
|
+
|
|
67
|
+
def _is_valid_cache_path(path: str) -> bool:
|
|
68
|
+
"""
|
|
69
|
+
path should look like oss://oss_endpoint/oss_bucket/path
|
|
70
|
+
"""
|
|
71
|
+
parsed_url = urlparse(path)
|
|
72
|
+
return (
|
|
73
|
+
parsed_url.scheme == "oss"
|
|
74
|
+
and parsed_url.netloc
|
|
75
|
+
and parsed_url.path
|
|
76
|
+
and "/" in parsed_url.path
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
is_valid_cache_path = Validator(_is_valid_cache_path)
|
|
50
81
|
|
|
51
82
|
|
|
52
83
|
_invalid_char_in_yaml_str = {'"', "'", "\n", "\\"}
|
maxframe/conftest.py
CHANGED
|
@@ -14,10 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
import faulthandler
|
|
16
16
|
import os
|
|
17
|
-
from configparser import ConfigParser, NoOptionError
|
|
17
|
+
from configparser import ConfigParser, NoOptionError, NoSectionError
|
|
18
18
|
|
|
19
19
|
import pytest
|
|
20
20
|
from odps import ODPS
|
|
21
|
+
from odps.accounts import BearerTokenAccount
|
|
22
|
+
|
|
23
|
+
from .config import options
|
|
21
24
|
|
|
22
25
|
faulthandler.enable(all_threads=True)
|
|
23
26
|
_test_conf_file_name = os.path.join(
|
|
@@ -32,12 +35,23 @@ def test_config():
|
|
|
32
35
|
return config
|
|
33
36
|
|
|
34
37
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
def _get_odps_env(test_config: ConfigParser, section_name: str) -> ODPS:
|
|
39
|
+
try:
|
|
40
|
+
access_id = test_config.get(section_name, "access_id")
|
|
41
|
+
except NoOptionError:
|
|
42
|
+
access_id = test_config.get("odps", "access_id")
|
|
43
|
+
try:
|
|
44
|
+
secret_access_key = test_config.get(section_name, "secret_access_key")
|
|
45
|
+
except NoOptionError:
|
|
46
|
+
secret_access_key = test_config.get("odps", "secret_access_key")
|
|
47
|
+
try:
|
|
48
|
+
project = test_config.get(section_name, "project")
|
|
49
|
+
except NoOptionError:
|
|
50
|
+
project = test_config.get("odps", "project")
|
|
51
|
+
try:
|
|
52
|
+
endpoint = test_config.get(section_name, "endpoint")
|
|
53
|
+
except NoOptionError:
|
|
54
|
+
endpoint = test_config.get("odps", "endpoint")
|
|
41
55
|
try:
|
|
42
56
|
tunnel_endpoint = test_config.get("odps", "tunnel_endpoint")
|
|
43
57
|
except NoOptionError:
|
|
@@ -53,12 +67,31 @@ def odps_envs(test_config):
|
|
|
53
67
|
],
|
|
54
68
|
}
|
|
55
69
|
token = entry.get_project().generate_auth_token(policy, "bearer", 5)
|
|
70
|
+
return ODPS(
|
|
71
|
+
account=BearerTokenAccount(token, 5),
|
|
72
|
+
project=project,
|
|
73
|
+
endpoint=endpoint,
|
|
74
|
+
tunnel_endpoint=tunnel_endpoint,
|
|
75
|
+
)
|
|
56
76
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
77
|
+
|
|
78
|
+
@pytest.fixture(scope="session")
|
|
79
|
+
def odps_with_schema(test_config):
|
|
80
|
+
try:
|
|
81
|
+
return _get_odps_env(test_config, "odps_with_schema")
|
|
82
|
+
except NoSectionError:
|
|
83
|
+
pytest.skip("Need to specify odps_with_schema section in test.conf")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@pytest.fixture(scope="session", autouse=True)
|
|
87
|
+
def odps_envs(test_config):
|
|
88
|
+
entry = _get_odps_env(test_config, "odps")
|
|
89
|
+
|
|
90
|
+
os.environ["ODPS_BEARER_TOKEN"] = entry.account.token
|
|
91
|
+
os.environ["ODPS_PROJECT_NAME"] = entry.project
|
|
92
|
+
os.environ["ODPS_ENDPOINT"] = entry.endpoint
|
|
93
|
+
if entry.tunnel_endpoint:
|
|
94
|
+
os.environ["ODPS_TUNNEL_ENDPOINT"] = entry.tunnel_endpoint
|
|
62
95
|
|
|
63
96
|
try:
|
|
64
97
|
yield
|
|
@@ -77,16 +110,23 @@ def odps_envs(test_config):
|
|
|
77
110
|
pass
|
|
78
111
|
|
|
79
112
|
|
|
80
|
-
@pytest.fixture
|
|
113
|
+
@pytest.fixture(scope="session")
|
|
81
114
|
def oss_config():
|
|
82
115
|
config = ConfigParser()
|
|
83
116
|
config.read(_test_conf_file_name)
|
|
84
117
|
|
|
118
|
+
old_role_arn = options.service_role_arn
|
|
119
|
+
old_cache_url = options.object_cache_url
|
|
120
|
+
|
|
85
121
|
try:
|
|
86
122
|
oss_access_id = config.get("oss", "access_id")
|
|
87
123
|
oss_secret_access_key = config.get("oss", "secret_access_key")
|
|
88
124
|
oss_bucket_name = config.get("oss", "bucket_name")
|
|
89
125
|
oss_endpoint = config.get("oss", "endpoint")
|
|
126
|
+
oss_rolearn = config.get("oss", "rolearn")
|
|
127
|
+
|
|
128
|
+
options.service_role_arn = oss_rolearn
|
|
129
|
+
options.object_cache_url = f"oss://{oss_endpoint}/{oss_bucket_name}"
|
|
90
130
|
|
|
91
131
|
config.oss_config = (
|
|
92
132
|
oss_access_id,
|
|
@@ -99,9 +139,13 @@ def oss_config():
|
|
|
99
139
|
|
|
100
140
|
auth = oss2.Auth(oss_access_id, oss_secret_access_key)
|
|
101
141
|
config.oss_bucket = oss2.Bucket(auth, oss_endpoint, oss_bucket_name)
|
|
102
|
-
|
|
142
|
+
config.oss_rolearn = oss_rolearn
|
|
143
|
+
yield config
|
|
103
144
|
except (ConfigParser.NoSectionError, ConfigParser.NoOptionError, ImportError):
|
|
104
145
|
return None
|
|
146
|
+
finally:
|
|
147
|
+
options.service_role_arn = old_role_arn
|
|
148
|
+
options.object_cache_url = old_cache_url
|
|
105
149
|
|
|
106
150
|
|
|
107
151
|
@pytest.fixture(autouse=True)
|
maxframe/core/__init__.py
CHANGED
|
@@ -14,27 +14,18 @@
|
|
|
14
14
|
|
|
15
15
|
# noinspection PyUnresolvedReferences
|
|
16
16
|
from ..typing_ import ChunkType, EntityType, OperatorType, TileableType
|
|
17
|
-
from .base import ExecutionError
|
|
17
|
+
from .base import Base, ExecutionError
|
|
18
18
|
from .entity import (
|
|
19
|
-
CHUNK_TYPE,
|
|
20
19
|
ENTITY_TYPE,
|
|
21
|
-
FUSE_CHUNK_TYPE,
|
|
22
|
-
OBJECT_CHUNK_TYPE,
|
|
23
20
|
OBJECT_TYPE,
|
|
24
21
|
TILEABLE_TYPE,
|
|
25
|
-
Chunk,
|
|
26
|
-
ChunkData,
|
|
27
22
|
Entity,
|
|
28
23
|
EntityData,
|
|
29
24
|
ExecutableTuple,
|
|
30
|
-
FuseChunk,
|
|
31
|
-
FuseChunkData,
|
|
32
25
|
HasShapeTileable,
|
|
33
26
|
HasShapeTileableData,
|
|
34
27
|
NotSupportTile,
|
|
35
28
|
Object,
|
|
36
|
-
ObjectChunk,
|
|
37
|
-
ObjectChunkData,
|
|
38
29
|
ObjectData,
|
|
39
30
|
OutputType,
|
|
40
31
|
Tileable,
|
|
@@ -43,23 +34,18 @@ from .entity import (
|
|
|
43
34
|
get_fetch_class,
|
|
44
35
|
get_output_types,
|
|
45
36
|
get_tileable_types,
|
|
46
|
-
register,
|
|
47
37
|
register_fetch_class,
|
|
48
38
|
register_output_types,
|
|
49
|
-
unregister,
|
|
50
39
|
)
|
|
51
40
|
|
|
52
41
|
# noinspection PyUnresolvedReferences
|
|
53
42
|
from .graph import (
|
|
54
43
|
DAG,
|
|
55
|
-
ChunkGraph,
|
|
56
|
-
ChunkGraphBuilder,
|
|
57
44
|
DirectedGraph,
|
|
58
45
|
GraphContainsCycleError,
|
|
59
46
|
GraphSerializer,
|
|
60
47
|
TileableGraph,
|
|
61
48
|
TileableGraphBuilder,
|
|
62
|
-
TileContext,
|
|
63
|
-
TileStatus,
|
|
64
49
|
)
|
|
65
50
|
from .mode import enter_mode, is_build_mode, is_eager_mode, is_kernel_mode
|
|
51
|
+
from .operator import build_fetch
|
maxframe/core/entity/__init__.py
CHANGED
|
@@ -12,18 +12,9 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from .chunks import CHUNK_TYPE, Chunk, ChunkData
|
|
16
15
|
from .core import ENTITY_TYPE, Entity, EntityData
|
|
17
16
|
from .executable import ExecutableTuple, _ExecuteAndFetchMixin
|
|
18
|
-
from .
|
|
19
|
-
from .objects import (
|
|
20
|
-
OBJECT_CHUNK_TYPE,
|
|
21
|
-
OBJECT_TYPE,
|
|
22
|
-
Object,
|
|
23
|
-
ObjectChunk,
|
|
24
|
-
ObjectChunkData,
|
|
25
|
-
ObjectData,
|
|
26
|
-
)
|
|
17
|
+
from .objects import OBJECT_TYPE, Object, ObjectData
|
|
27
18
|
from .output_types import (
|
|
28
19
|
OutputType,
|
|
29
20
|
get_fetch_class,
|
|
@@ -39,6 +30,4 @@ from .tileables import (
|
|
|
39
30
|
NotSupportTile,
|
|
40
31
|
Tileable,
|
|
41
32
|
TileableData,
|
|
42
|
-
register,
|
|
43
|
-
unregister,
|
|
44
33
|
)
|
maxframe/core/entity/objects.py
CHANGED
|
@@ -12,63 +12,57 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import Any, Dict
|
|
15
|
+
from typing import Any, Dict, Type
|
|
16
16
|
|
|
17
|
-
from ...serialization
|
|
18
|
-
from ...
|
|
19
|
-
from .chunks import Chunk, ChunkData
|
|
17
|
+
from ...serialization import load_type
|
|
18
|
+
from ...serialization.serializables import StringField
|
|
20
19
|
from .core import Entity
|
|
21
20
|
from .executable import _ToObjectMixin
|
|
22
21
|
from .tileables import TileableData
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
class
|
|
26
|
-
# chunk whose data could be any serializable
|
|
24
|
+
class ObjectData(TileableData, _ToObjectMixin):
|
|
27
25
|
__slots__ = ()
|
|
28
26
|
type_name = "Object"
|
|
27
|
+
# workaround for removed field since v0.1.0b5
|
|
28
|
+
# todo remove this when all versions below v1.0.0rc1 is eliminated
|
|
29
|
+
_legacy_deprecated_non_primitives = ["_chunks"]
|
|
30
|
+
_legacy_new_non_primitives = ["object_class"]
|
|
29
31
|
|
|
30
|
-
|
|
31
|
-
super().__init__(_op=op, _index=index, **kw)
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def params(self) -> Dict[str, Any]:
|
|
35
|
-
# params return the properties which useful to rebuild a new chunk
|
|
36
|
-
return {
|
|
37
|
-
"index": self.index,
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
@params.setter
|
|
41
|
-
def params(self, new_params: Dict[str, Any]):
|
|
42
|
-
params = new_params.copy()
|
|
43
|
-
params.pop("index", None) # index not needed to update
|
|
44
|
-
if params: # pragma: no cover
|
|
45
|
-
raise TypeError(f"Unknown params: {list(params)}")
|
|
32
|
+
object_class = StringField("object_class", default=None)
|
|
46
33
|
|
|
47
34
|
@classmethod
|
|
48
|
-
def
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
on_deserialize=skip_na_call(lambda x: [ObjectChunk(it) for it in x]),
|
|
68
|
-
)
|
|
35
|
+
def get_entity_class(cls) -> Type["Object"]:
|
|
36
|
+
if getattr(cls, "_entity_class", None) is not None:
|
|
37
|
+
return cls._entity_class
|
|
38
|
+
assert cls.__qualname__[-4:] == "Data"
|
|
39
|
+
target_class_name = cls.__module__ + "#" + cls.__qualname__[:-4]
|
|
40
|
+
cls._entity_class = load_type(target_class_name, Object)
|
|
41
|
+
return cls._entity_class
|
|
42
|
+
|
|
43
|
+
def __new__(cls, op=None, nsplits=None, **kw):
|
|
44
|
+
if cls is ObjectData:
|
|
45
|
+
obj_cls = kw.get("object_class")
|
|
46
|
+
if isinstance(obj_cls, str):
|
|
47
|
+
obj_cls = load_type(obj_cls, (Object, ObjectData))
|
|
48
|
+
if isinstance(obj_cls, type) and issubclass(obj_cls, Object):
|
|
49
|
+
obj_cls = obj_cls.get_data_class()
|
|
50
|
+
|
|
51
|
+
if obj_cls is not None and cls is not obj_cls:
|
|
52
|
+
return obj_cls(op=op, nsplits=nsplits, **kw)
|
|
53
|
+
return super().__new__(cls)
|
|
69
54
|
|
|
70
55
|
def __init__(self, op=None, nsplits=None, **kw):
|
|
56
|
+
obj_cls = kw.pop("object_class", None)
|
|
57
|
+
if isinstance(obj_cls, type):
|
|
58
|
+
if isinstance(obj_cls, type) and issubclass(obj_cls, Object):
|
|
59
|
+
obj_cls = obj_cls.get_data_class()
|
|
60
|
+
kw["object_class"] = obj_cls.__module__ + "#" + obj_cls.__qualname__
|
|
61
|
+
|
|
71
62
|
super().__init__(_op=op, _nsplits=nsplits, **kw)
|
|
63
|
+
if self.object_class is None and type(self) is not ObjectData:
|
|
64
|
+
cls = type(self)
|
|
65
|
+
self.object_class = cls.__module__ + "#" + cls.__qualname__
|
|
72
66
|
|
|
73
67
|
def __repr__(self):
|
|
74
68
|
return f"Object <op={type(self.op).__name__}, key={self.key}>"
|
|
@@ -76,7 +70,7 @@ class ObjectData(TileableData, _ToObjectMixin):
|
|
|
76
70
|
@property
|
|
77
71
|
def params(self):
|
|
78
72
|
# params return the properties which useful to rebuild a new tileable object
|
|
79
|
-
return dict()
|
|
73
|
+
return dict(object_class=self.object_class)
|
|
80
74
|
|
|
81
75
|
@params.setter
|
|
82
76
|
def params(self, new_params: Dict[str, Any]):
|
|
@@ -95,6 +89,13 @@ class Object(Entity, _ToObjectMixin):
|
|
|
95
89
|
_allow_data_type_ = (ObjectData,)
|
|
96
90
|
type_name = "Object"
|
|
97
91
|
|
|
92
|
+
@classmethod
|
|
93
|
+
def get_data_class(cls) -> Type[ObjectData]:
|
|
94
|
+
if getattr(cls, "_data_class", None) is not None:
|
|
95
|
+
return cls._data_class
|
|
96
|
+
target_class_name = cls.__module__ + "#" + cls.__qualname__ + "Data"
|
|
97
|
+
cls._data_class = load_type(target_class_name, ObjectData)
|
|
98
|
+
return cls._data_class
|
|
99
|
+
|
|
98
100
|
|
|
99
101
|
OBJECT_TYPE = (Object, ObjectData)
|
|
100
|
-
OBJECT_CHUNK_TYPE = (ObjectChunk, ObjectChunkData)
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
import functools
|
|
16
16
|
from enum import Enum
|
|
17
17
|
|
|
18
|
-
from .fuse import FUSE_CHUNK_TYPE
|
|
19
18
|
from .objects import OBJECT_TYPE
|
|
20
19
|
|
|
21
20
|
|
|
@@ -77,8 +76,6 @@ def get_output_types(*objs, unknown_as=None):
|
|
|
77
76
|
for obj in objs:
|
|
78
77
|
if obj is None:
|
|
79
78
|
continue
|
|
80
|
-
elif isinstance(obj, FUSE_CHUNK_TYPE):
|
|
81
|
-
obj = obj.chunk
|
|
82
79
|
|
|
83
80
|
try:
|
|
84
81
|
output_types.append(_get_output_type_by_cls(type(obj)))
|