deltacat 1.1.3__py3-none-any.whl → 1.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/constants.py +0 -4
- deltacat/aws/s3u.py +26 -5
- deltacat/compute/compactor/utils/round_completion_file.py +3 -0
- deltacat/compute/compactor_v2/compaction_session.py +2 -0
- deltacat/compute/compactor_v2/constants.py +0 -3
- deltacat/compute/compactor_v2/utils/merge.py +0 -3
- deltacat/logs.py +126 -69
- deltacat/tests/aws/test_s3u.py +12 -0
- deltacat/tests/test_logs.py +127 -0
- deltacat/tests/utils/test_placement.py +25 -0
- deltacat/utils/placement.py +14 -7
- {deltacat-1.1.3.dist-info → deltacat-1.1.4.dist-info}/METADATA +1 -1
- {deltacat-1.1.3.dist-info → deltacat-1.1.4.dist-info}/RECORD +17 -14
- {deltacat-1.1.3.dist-info → deltacat-1.1.4.dist-info}/LICENSE +0 -0
- {deltacat-1.1.3.dist-info → deltacat-1.1.4.dist-info}/WHEEL +0 -0
- {deltacat-1.1.3.dist-info → deltacat-1.1.4.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
deltacat/aws/constants.py
CHANGED
@@ -6,7 +6,3 @@ DAFT_MAX_S3_CONNECTIONS_PER_FILE = env_integer("DAFT_MAX_S3_CONNECTIONS_PER_FILE
|
|
6
6
|
BOTO_MAX_RETRIES = env_integer("BOTO_MAX_RETRIES", 5)
|
7
7
|
TIMEOUT_ERROR_CODES: List[str] = ["ReadTimeoutError", "ConnectTimeoutError"]
|
8
8
|
AWS_REGION = env_string("AWS_REGION", "us-east-1")
|
9
|
-
|
10
|
-
# Metric Names
|
11
|
-
DOWNLOAD_MANIFEST_ENTRY_METRIC_PREFIX = "download_manifest_entry"
|
12
|
-
UPLOAD_SLICED_TABLE_METRIC_PREFIX = "upload_sliced_table"
|
deltacat/aws/s3u.py
CHANGED
@@ -27,8 +27,6 @@ import deltacat.aws.clients as aws_utils
|
|
27
27
|
from deltacat import logs
|
28
28
|
from deltacat.aws.constants import (
|
29
29
|
TIMEOUT_ERROR_CODES,
|
30
|
-
DOWNLOAD_MANIFEST_ENTRY_METRIC_PREFIX,
|
31
|
-
UPLOAD_SLICED_TABLE_METRIC_PREFIX,
|
32
30
|
)
|
33
31
|
from deltacat.exceptions import NonRetryableError, RetryableError
|
34
32
|
from deltacat.storage import (
|
@@ -54,7 +52,6 @@ from deltacat.types.tables import (
|
|
54
52
|
)
|
55
53
|
from deltacat.types.partial_download import PartialFileDownloadParams
|
56
54
|
from deltacat.utils.common import ReadKwargsProvider
|
57
|
-
from deltacat.utils.metrics import metrics
|
58
55
|
|
59
56
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
60
57
|
|
@@ -121,6 +118,32 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
|
|
121
118
|
self.block_refs.append(block)
|
122
119
|
return write_path
|
123
120
|
|
121
|
+
def __call__(
|
122
|
+
self,
|
123
|
+
base_path: str,
|
124
|
+
*,
|
125
|
+
filesystem: Optional[pa.filesystem.FileSystem] = None,
|
126
|
+
dataset_uuid: Optional[str] = None,
|
127
|
+
block: Optional[ObjectRef[Block]] = None,
|
128
|
+
block_index: Optional[int] = None,
|
129
|
+
file_format: Optional[str] = None,
|
130
|
+
) -> str:
|
131
|
+
"""
|
132
|
+
TODO: BlockWritePathProvider is deprecated as of Ray version 2.20.0. Please use FilenameProvider.
|
133
|
+
See: https://docs.ray.io/en/master/data/api/doc/ray.data.datasource.FilenameProvider.html
|
134
|
+
Also See: https://github.com/ray-project/deltacat/issues/299
|
135
|
+
|
136
|
+
Hence, this class only works with Ray version 2.20.0 or lower when used in Ray Dataset.
|
137
|
+
"""
|
138
|
+
return self._get_write_path_for_block(
|
139
|
+
base_path,
|
140
|
+
filesystem=filesystem,
|
141
|
+
dataset_uuid=dataset_uuid,
|
142
|
+
block=block,
|
143
|
+
block_index=block_index,
|
144
|
+
file_format=file_format,
|
145
|
+
)
|
146
|
+
|
124
147
|
|
125
148
|
class S3Url:
|
126
149
|
def __init__(self, url: str):
|
@@ -243,7 +266,6 @@ def read_file(
|
|
243
266
|
raise e
|
244
267
|
|
245
268
|
|
246
|
-
@metrics(prefix=UPLOAD_SLICED_TABLE_METRIC_PREFIX)
|
247
269
|
def upload_sliced_table(
|
248
270
|
table: Union[LocalTable, DistributedDataset],
|
249
271
|
s3_url_prefix: str,
|
@@ -352,7 +374,6 @@ def upload_table(
|
|
352
374
|
return manifest_entries
|
353
375
|
|
354
376
|
|
355
|
-
@metrics(prefix=DOWNLOAD_MANIFEST_ENTRY_METRIC_PREFIX)
|
356
377
|
def download_manifest_entry(
|
357
378
|
manifest_entry: ManifestEntry,
|
358
379
|
token_holder: Optional[Dict[str, Any]] = None,
|
@@ -6,6 +6,7 @@ from deltacat.compute.compactor import RoundCompletionInfo
|
|
6
6
|
from deltacat.storage import PartitionLocator
|
7
7
|
from deltacat.aws import s3u as s3_utils
|
8
8
|
from typing import Optional
|
9
|
+
from deltacat.utils.metrics import metrics
|
9
10
|
|
10
11
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
11
12
|
|
@@ -18,6 +19,7 @@ def get_round_completion_file_s3_url(
|
|
18
19
|
return f"{base_url}.json"
|
19
20
|
|
20
21
|
|
22
|
+
@metrics
|
21
23
|
def read_round_completion_file(
|
22
24
|
bucket: str,
|
23
25
|
source_partition_locator: PartitionLocator,
|
@@ -38,6 +40,7 @@ def read_round_completion_file(
|
|
38
40
|
return round_completion_info
|
39
41
|
|
40
42
|
|
43
|
+
@metrics
|
41
44
|
def write_round_completion_file(
|
42
45
|
bucket: Optional[str],
|
43
46
|
source_partition_locator: Optional[PartitionLocator],
|
@@ -50,6 +50,7 @@ from deltacat.compute.compactor_v2.steps import merge as mg
|
|
50
50
|
from deltacat.compute.compactor_v2.steps import hash_bucket as hb
|
51
51
|
from deltacat.compute.compactor_v2.utils import io
|
52
52
|
from deltacat.compute.compactor.utils import round_completion_file as rcf
|
53
|
+
from deltacat.utils.metrics import metrics
|
53
54
|
|
54
55
|
from typing import List, Optional, Tuple
|
55
56
|
from collections import defaultdict
|
@@ -73,6 +74,7 @@ if importlib.util.find_spec("memray"):
|
|
73
74
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
74
75
|
|
75
76
|
|
77
|
+
@metrics
|
76
78
|
def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
|
77
79
|
|
78
80
|
assert (
|
@@ -31,14 +31,11 @@ from deltacat.compute.compactor_v2.deletes.delete_strategy import (
|
|
31
31
|
from deltacat.compute.compactor_v2.deletes.delete_file_envelope import (
|
32
32
|
DeleteFileEnvelope,
|
33
33
|
)
|
34
|
-
from deltacat.utils.metrics import metrics
|
35
|
-
from deltacat.compute.compactor_v2.constants import MATERIALIZE_METRIC_PREFIX
|
36
34
|
|
37
35
|
|
38
36
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
39
37
|
|
40
38
|
|
41
|
-
@metrics(prefix=MATERIALIZE_METRIC_PREFIX)
|
42
39
|
def materialize(
|
43
40
|
input: MergeInput,
|
44
41
|
task_index: int,
|
deltacat/logs.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
+
import json
|
3
4
|
import pathlib
|
4
5
|
from logging import FileHandler, Handler, Logger, LoggerAdapter, handlers
|
5
6
|
from typing import Any, Dict, Optional, Union
|
@@ -19,13 +20,106 @@ from deltacat.constants import (
|
|
19
20
|
)
|
20
21
|
|
21
22
|
DEFAULT_LOG_LEVEL = "INFO"
|
22
|
-
DEFAULT_LOG_FORMAT =
|
23
|
-
"
|
24
|
-
|
23
|
+
DEFAULT_LOG_FORMAT = {
|
24
|
+
"level": "levelname",
|
25
|
+
"message": "message",
|
26
|
+
"loggerName": "name",
|
27
|
+
"processName": "processName",
|
28
|
+
"processID": "process",
|
29
|
+
"threadName": "threadName",
|
30
|
+
"timestamp": "asctime",
|
31
|
+
"filename": "filename",
|
32
|
+
"lineno": "lineno",
|
33
|
+
}
|
25
34
|
DEFAULT_MAX_BYTES_PER_LOG = 2 ^ 20 * 256 # 256 MiB
|
26
35
|
DEFAULT_BACKUP_COUNT = 0
|
27
36
|
|
28
37
|
|
38
|
+
class JsonFormatter(logging.Formatter):
|
39
|
+
"""
|
40
|
+
Formatter that outputs JSON strings after parsing the LogRecord.
|
41
|
+
|
42
|
+
@param dict fmt_dict: Key: logging format attribute pairs. Defaults to {"message": "message"}.
|
43
|
+
@param str time_format: time.strftime() format string. Default: "%Y-%m-%dT%H:%M:%S"
|
44
|
+
@param str msec_format: Microsecond formatting. Appended at the end. Default: "%s.%03dZ"
|
45
|
+
"""
|
46
|
+
|
47
|
+
def __init__(
|
48
|
+
self,
|
49
|
+
fmt_dict: dict = None,
|
50
|
+
time_format: str = "%Y-%m-%dT%H:%M:%S",
|
51
|
+
msec_format: str = "%s.%03dZ",
|
52
|
+
):
|
53
|
+
self.fmt_dict = fmt_dict if fmt_dict is not None else {"message": "message"}
|
54
|
+
self.default_time_format = time_format
|
55
|
+
self.default_msec_format = msec_format
|
56
|
+
self.datefmt = None
|
57
|
+
if ray.is_initialized():
|
58
|
+
self.ray_runtime_ctx: RuntimeContext = ray.get_runtime_context()
|
59
|
+
self.context = {}
|
60
|
+
self.context["worker_id"] = self.ray_runtime_ctx.get_worker_id()
|
61
|
+
self.context["node_id"] = self.ray_runtime_ctx.get_node_id()
|
62
|
+
self.context["job_id"] = self.ray_runtime_ctx.get_job_id()
|
63
|
+
else:
|
64
|
+
self.ray_runtime_ctx = None
|
65
|
+
self.context = {}
|
66
|
+
|
67
|
+
def usesTime(self) -> bool:
|
68
|
+
"""
|
69
|
+
Overwritten to look for the attribute in the format dict values instead of the fmt string.
|
70
|
+
"""
|
71
|
+
return "asctime" in self.fmt_dict.values()
|
72
|
+
|
73
|
+
def formatMessage(self, record) -> dict:
|
74
|
+
"""
|
75
|
+
Overwritten to return a dictionary of the relevant LogRecord attributes instead of a string.
|
76
|
+
KeyError is raised if an unknown attribute is provided in the fmt_dict.
|
77
|
+
"""
|
78
|
+
return {
|
79
|
+
fmt_key: record.__dict__[fmt_val]
|
80
|
+
for fmt_key, fmt_val in self.fmt_dict.items()
|
81
|
+
}
|
82
|
+
|
83
|
+
def format(self, record) -> str:
|
84
|
+
"""
|
85
|
+
Mostly the same as the parent's class method, the difference being that a dict is manipulated and dumped as JSON
|
86
|
+
instead of a string.
|
87
|
+
"""
|
88
|
+
record.message = record.getMessage()
|
89
|
+
|
90
|
+
if self.usesTime():
|
91
|
+
record.asctime = self.formatTime(record, self.datefmt)
|
92
|
+
|
93
|
+
message_dict = self.formatMessage(record)
|
94
|
+
|
95
|
+
if record.exc_info:
|
96
|
+
# Cache the traceback text to avoid converting it multiple times
|
97
|
+
# (it's constant anyway)
|
98
|
+
if not record.exc_text:
|
99
|
+
record.exc_text = self.formatException(record.exc_info)
|
100
|
+
|
101
|
+
if record.exc_text:
|
102
|
+
message_dict["exc_info"] = record.exc_text
|
103
|
+
|
104
|
+
if record.stack_info:
|
105
|
+
message_dict["stack_info"] = self.formatStack(record.stack_info)
|
106
|
+
|
107
|
+
if self.ray_runtime_ctx:
|
108
|
+
# only workers will have task ID
|
109
|
+
if (
|
110
|
+
self.ray_runtime_ctx.worker
|
111
|
+
and self.ray_runtime_ctx.worker.mode == ray._private.worker.WORKER_MODE
|
112
|
+
):
|
113
|
+
self.context["task_id"] = self.ray_runtime_ctx.get_task_id()
|
114
|
+
self.context[
|
115
|
+
"assigned_resources"
|
116
|
+
] = self.ray_runtime_ctx.get_assigned_resources()
|
117
|
+
|
118
|
+
message_dict["ray_runtime_context"] = self.context
|
119
|
+
|
120
|
+
return json.dumps(message_dict, default=str)
|
121
|
+
|
122
|
+
|
29
123
|
class DeltaCATLoggerAdapter(logging.LoggerAdapter):
|
30
124
|
"""
|
31
125
|
Logger Adapter class with additional functionality
|
@@ -51,54 +145,6 @@ class DeltaCATLoggerAdapter(logging.LoggerAdapter):
|
|
51
145
|
self.error(msg, *args, **kwargs)
|
52
146
|
|
53
147
|
|
54
|
-
class RayRuntimeContextLoggerAdapter(DeltaCATLoggerAdapter):
|
55
|
-
"""
|
56
|
-
Logger Adapter for injecting Ray Runtime Context into logging messages.
|
57
|
-
"""
|
58
|
-
|
59
|
-
def __init__(self, logger: Logger, runtime_context: RuntimeContext):
|
60
|
-
super().__init__(logger, {})
|
61
|
-
self.runtime_context = runtime_context
|
62
|
-
|
63
|
-
def process(self, msg, kwargs):
|
64
|
-
"""
|
65
|
-
Injects Ray Runtime Context details into each log message.
|
66
|
-
|
67
|
-
This may include information such as the raylet node ID, task/actor ID, job ID,
|
68
|
-
placement group ID of the worker, and assigned resources to the task/actor.
|
69
|
-
|
70
|
-
Args:
|
71
|
-
msg: The original log message
|
72
|
-
kwargs: Keyword arguments for the log message
|
73
|
-
|
74
|
-
Returns: A log message with Ray Runtime Context details
|
75
|
-
|
76
|
-
"""
|
77
|
-
runtime_context_dict = self.runtime_context.get()
|
78
|
-
runtime_context_dict[
|
79
|
-
"worker_id"
|
80
|
-
] = self.runtime_context.worker.core_worker.get_worker_id()
|
81
|
-
if self.runtime_context.get_task_id() or self.runtime_context.get_actor_id():
|
82
|
-
runtime_context_dict[
|
83
|
-
"pg_id"
|
84
|
-
] = self.runtime_context.get_placement_group_id()
|
85
|
-
runtime_context_dict[
|
86
|
-
"assigned_resources"
|
87
|
-
] = self.runtime_context.get_assigned_resources()
|
88
|
-
|
89
|
-
return "(ray_runtime_context=%s) -- %s" % (runtime_context_dict, msg), kwargs
|
90
|
-
|
91
|
-
def __reduce__(self):
|
92
|
-
"""
|
93
|
-
Used to unpickle the class during Ray object store transfer.
|
94
|
-
"""
|
95
|
-
|
96
|
-
def deserializer(*args):
|
97
|
-
return RayRuntimeContextLoggerAdapter(args[0], ray.get_runtime_context())
|
98
|
-
|
99
|
-
return deserializer, (self.logger,)
|
100
|
-
|
101
|
-
|
102
148
|
def _add_logger_handler(logger: Logger, handler: Handler) -> Logger:
|
103
149
|
|
104
150
|
logger.setLevel(logging.getLevelName("DEBUG"))
|
@@ -109,10 +155,10 @@ def _add_logger_handler(logger: Logger, handler: Handler) -> Logger:
|
|
109
155
|
def _create_rotating_file_handler(
|
110
156
|
log_directory: str,
|
111
157
|
log_base_file_name: str,
|
112
|
-
logging_level: str = DEFAULT_LOG_LEVEL,
|
158
|
+
logging_level: Union[str, int] = DEFAULT_LOG_LEVEL,
|
113
159
|
max_bytes_per_log_file: int = DEFAULT_MAX_BYTES_PER_LOG,
|
114
160
|
backup_count: int = DEFAULT_BACKUP_COUNT,
|
115
|
-
logging_format: str = DEFAULT_LOG_FORMAT,
|
161
|
+
logging_format: Union[str, dict] = DEFAULT_LOG_FORMAT,
|
116
162
|
) -> FileHandler:
|
117
163
|
|
118
164
|
if type(logging_level) is str:
|
@@ -126,7 +172,12 @@ def _create_rotating_file_handler(
|
|
126
172
|
maxBytes=max_bytes_per_log_file,
|
127
173
|
backupCount=backup_count,
|
128
174
|
)
|
129
|
-
|
175
|
+
|
176
|
+
if type(logging_format) is str:
|
177
|
+
handler.setFormatter(logging.Formatter(logging_format))
|
178
|
+
else:
|
179
|
+
handler.setFormatter(JsonFormatter(logging_format))
|
180
|
+
|
130
181
|
handler.setLevel(logging_level)
|
131
182
|
return handler
|
132
183
|
|
@@ -135,7 +186,8 @@ def _file_handler_exists(logger: Logger, log_dir: str, log_base_file_name: str)
|
|
135
186
|
|
136
187
|
handler_exists = False
|
137
188
|
base_file_path = os.path.join(log_dir, log_base_file_name)
|
138
|
-
|
189
|
+
|
190
|
+
if logger.handlers:
|
139
191
|
norm_base_file_path = os.path.normpath(base_file_path)
|
140
192
|
handler_exists = any(
|
141
193
|
[
|
@@ -149,49 +201,54 @@ def _file_handler_exists(logger: Logger, log_dir: str, log_base_file_name: str)
|
|
149
201
|
|
150
202
|
def _configure_logger(
|
151
203
|
logger: Logger,
|
152
|
-
log_level:
|
204
|
+
log_level: int,
|
153
205
|
log_dir: str,
|
154
206
|
log_base_file_name: str,
|
155
207
|
debug_log_base_file_name: str,
|
156
208
|
) -> Union[Logger, LoggerAdapter]:
|
209
|
+
# This maintains log level of rotating file handlers
|
157
210
|
primary_log_level = log_level
|
158
211
|
logger.propagate = False
|
159
|
-
if log_level.
|
212
|
+
if log_level <= logging.getLevelName("DEBUG"):
|
160
213
|
if not _file_handler_exists(logger, log_dir, debug_log_base_file_name):
|
161
214
|
handler = _create_rotating_file_handler(
|
162
215
|
log_dir, debug_log_base_file_name, "DEBUG"
|
163
216
|
)
|
164
217
|
_add_logger_handler(logger, handler)
|
165
|
-
primary_log_level = "INFO"
|
218
|
+
primary_log_level = logging.getLevelName("INFO")
|
166
219
|
if not _file_handler_exists(logger, log_dir, log_base_file_name):
|
167
220
|
handler = _create_rotating_file_handler(
|
168
221
|
log_dir, log_base_file_name, primary_log_level
|
169
222
|
)
|
170
223
|
_add_logger_handler(logger, handler)
|
171
|
-
if ray.is_initialized():
|
172
|
-
ray_runtime_ctx = ray.get_runtime_context()
|
173
|
-
if ray_runtime_ctx.worker.connected:
|
174
|
-
logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
|
175
|
-
else:
|
176
|
-
logger = DeltaCATLoggerAdapter(logger)
|
177
224
|
|
178
|
-
return logger
|
225
|
+
return DeltaCATLoggerAdapter(logger)
|
226
|
+
|
179
227
|
|
228
|
+
def configure_deltacat_logger(
|
229
|
+
logger: Logger, level: int = None
|
230
|
+
) -> Union[Logger, LoggerAdapter]:
|
231
|
+
if level is None:
|
232
|
+
level = logging.getLevelName(DELTACAT_SYS_LOG_LEVEL)
|
180
233
|
|
181
|
-
def configure_deltacat_logger(logger: Logger) -> Union[Logger, LoggerAdapter]:
|
182
234
|
return _configure_logger(
|
183
235
|
logger,
|
184
|
-
|
236
|
+
level,
|
185
237
|
DELTACAT_SYS_LOG_DIR,
|
186
238
|
DELTACAT_SYS_INFO_LOG_BASE_FILE_NAME,
|
187
239
|
DELTACAT_SYS_DEBUG_LOG_BASE_FILE_NAME,
|
188
240
|
)
|
189
241
|
|
190
242
|
|
191
|
-
def configure_application_logger(
|
243
|
+
def configure_application_logger(
|
244
|
+
logger: Logger, level: int = None
|
245
|
+
) -> Union[Logger, LoggerAdapter]:
|
246
|
+
if level is None:
|
247
|
+
level = logging.getLevelName(DELTACAT_APP_LOG_LEVEL)
|
248
|
+
|
192
249
|
return _configure_logger(
|
193
250
|
logger,
|
194
|
-
|
251
|
+
level,
|
195
252
|
DELTACAT_APP_LOG_DIR,
|
196
253
|
DELTACAT_APP_INFO_LOG_BASE_FILE_NAME,
|
197
254
|
DELTACAT_APP_DEBUG_LOG_BASE_FILE_NAME,
|
@@ -0,0 +1,12 @@
|
|
1
|
+
import unittest
|
2
|
+
from deltacat.aws.s3u import UuidBlockWritePathProvider, CapturedBlockWritePaths
|
3
|
+
|
4
|
+
|
5
|
+
class TestUuidBlockWritePathProvider(unittest.TestCase):
|
6
|
+
def test_uuid_block_write_provider_sanity(self):
|
7
|
+
capture_object = CapturedBlockWritePaths()
|
8
|
+
provider = UuidBlockWritePathProvider(capture_object=capture_object)
|
9
|
+
|
10
|
+
result = provider("base_path")
|
11
|
+
|
12
|
+
self.assertRegex(result, r"^base_path/[\w-]{36}$")
|
@@ -0,0 +1,127 @@
|
|
1
|
+
import unittest
|
2
|
+
import json
|
3
|
+
import ray
|
4
|
+
from logging import LogRecord
|
5
|
+
from deltacat.logs import JsonFormatter
|
6
|
+
|
7
|
+
|
8
|
+
class TestJsonFormatter(unittest.TestCase):
|
9
|
+
def test_usesTime_sanity(self):
|
10
|
+
|
11
|
+
formatter = JsonFormatter()
|
12
|
+
|
13
|
+
self.assertFalse(formatter.usesTime())
|
14
|
+
|
15
|
+
def test_usesTime_success_case(self):
|
16
|
+
|
17
|
+
formatter = JsonFormatter(fmt_dict={"asctime": "asctime"})
|
18
|
+
|
19
|
+
self.assertTrue(formatter.usesTime())
|
20
|
+
|
21
|
+
def test_formatMessage_sanity(self):
|
22
|
+
|
23
|
+
formatter = JsonFormatter({"message": "msg"})
|
24
|
+
|
25
|
+
record = LogRecord(
|
26
|
+
level="INFO",
|
27
|
+
name="test",
|
28
|
+
pathname="test",
|
29
|
+
lineno=0,
|
30
|
+
message="test_message",
|
31
|
+
msg="test_message",
|
32
|
+
args=None,
|
33
|
+
exc_info=None,
|
34
|
+
)
|
35
|
+
|
36
|
+
result = formatter.formatMessage(record)
|
37
|
+
|
38
|
+
self.assertEqual({"message": "test_message"}, result)
|
39
|
+
|
40
|
+
def test_format_sanity(self):
|
41
|
+
formatter = JsonFormatter({"message": "msg"})
|
42
|
+
|
43
|
+
record = LogRecord(
|
44
|
+
level="INFO",
|
45
|
+
name="test",
|
46
|
+
pathname="test",
|
47
|
+
lineno=0,
|
48
|
+
message="test_message",
|
49
|
+
msg="test_message",
|
50
|
+
args=None,
|
51
|
+
exc_info=None,
|
52
|
+
)
|
53
|
+
|
54
|
+
result = formatter.format(record)
|
55
|
+
|
56
|
+
self.assertEqual({"message": "test_message"}, json.loads(result))
|
57
|
+
self.assertFalse(ray.is_initialized())
|
58
|
+
self.assertNotIn("ray_runtime_context", json.loads(result))
|
59
|
+
|
60
|
+
def test_format_when_ray_initialized(self):
|
61
|
+
ray.init(local_mode=True, ignore_reinit_error=True)
|
62
|
+
|
63
|
+
formatter = JsonFormatter({"message": "msg"})
|
64
|
+
|
65
|
+
record = LogRecord(
|
66
|
+
level="INFO",
|
67
|
+
name="test",
|
68
|
+
pathname="test",
|
69
|
+
lineno=0,
|
70
|
+
message="test_message",
|
71
|
+
msg="test_message",
|
72
|
+
args=None,
|
73
|
+
exc_info=None,
|
74
|
+
)
|
75
|
+
|
76
|
+
result = formatter.format(record)
|
77
|
+
result = json.loads(result)
|
78
|
+
|
79
|
+
self.assertEqual("test_message", result["message"])
|
80
|
+
self.assertTrue(ray.is_initialized())
|
81
|
+
self.assertIn("ray_runtime_context", result)
|
82
|
+
self.assertIn("job_id", result["ray_runtime_context"])
|
83
|
+
self.assertIn("node_id", result["ray_runtime_context"])
|
84
|
+
self.assertIn("worker_id", result["ray_runtime_context"])
|
85
|
+
self.assertNotIn(
|
86
|
+
"task_id",
|
87
|
+
result["ray_runtime_context"],
|
88
|
+
"We expect task ID not be present outside a remote task",
|
89
|
+
)
|
90
|
+
ray.shutdown()
|
91
|
+
|
92
|
+
def test_format_when_ray_initialized_in_task(self):
|
93
|
+
# worker mode is only true when local_mode is False
|
94
|
+
ray.init(local_mode=False, ignore_reinit_error=True)
|
95
|
+
|
96
|
+
@ray.remote
|
97
|
+
def ray_remote_task():
|
98
|
+
formatter = JsonFormatter({"message": "msg"})
|
99
|
+
|
100
|
+
record = LogRecord(
|
101
|
+
level="INFO",
|
102
|
+
name="test",
|
103
|
+
pathname="test",
|
104
|
+
lineno=0,
|
105
|
+
message="test_message",
|
106
|
+
msg="test_message",
|
107
|
+
args=None,
|
108
|
+
exc_info=None,
|
109
|
+
)
|
110
|
+
|
111
|
+
result = formatter.format(record)
|
112
|
+
result = json.loads(result)
|
113
|
+
return result
|
114
|
+
|
115
|
+
result = ray.get(ray_remote_task.remote())
|
116
|
+
self.assertEqual("test_message", result["message"])
|
117
|
+
self.assertTrue(ray.is_initialized())
|
118
|
+
self.assertIn("ray_runtime_context", result)
|
119
|
+
self.assertIn("job_id", result["ray_runtime_context"])
|
120
|
+
self.assertIn("node_id", result["ray_runtime_context"])
|
121
|
+
self.assertIn("worker_id", result["ray_runtime_context"])
|
122
|
+
self.assertIn(
|
123
|
+
"task_id",
|
124
|
+
result["ray_runtime_context"],
|
125
|
+
"We expect task ID to be present inside a remote task",
|
126
|
+
)
|
127
|
+
ray.shutdown()
|
@@ -0,0 +1,25 @@
|
|
1
|
+
import unittest
|
2
|
+
import ray
|
3
|
+
from deltacat.utils.placement import (
|
4
|
+
PlacementGroupManager,
|
5
|
+
_get_available_resources_per_node,
|
6
|
+
)
|
7
|
+
|
8
|
+
|
9
|
+
class TestPlacementGroupManager(unittest.TestCase):
|
10
|
+
@classmethod
|
11
|
+
def setUpClass(cls):
|
12
|
+
super().setUpClass()
|
13
|
+
ray.init(local_mode=True, ignore_reinit_error=True)
|
14
|
+
|
15
|
+
def test_placement_group_manager_sanity(self):
|
16
|
+
|
17
|
+
pgm = PlacementGroupManager(1, 1, 1)
|
18
|
+
|
19
|
+
self.assertIsNotNone(pgm)
|
20
|
+
|
21
|
+
def test_ray_state_api_returns_correctly(self):
|
22
|
+
|
23
|
+
result = _get_available_resources_per_node()
|
24
|
+
|
25
|
+
self.assertIsNotNone(result)
|
deltacat/utils/placement.py
CHANGED
@@ -2,6 +2,7 @@ import logging
|
|
2
2
|
import re
|
3
3
|
import time
|
4
4
|
from dataclasses import dataclass
|
5
|
+
from packaging.version import Version
|
5
6
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
6
7
|
|
7
8
|
import ray
|
@@ -19,6 +20,16 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
|
19
20
|
# Issue: https://github.com/ray-project/ray/issues/29959
|
20
21
|
|
21
22
|
|
23
|
+
def _get_available_resources_per_node():
|
24
|
+
# This API changed after this commit
|
25
|
+
# https://github.com/ray-project/ray/pull/43252
|
26
|
+
# TODO: Use this method from a durable State API once it's available
|
27
|
+
if Version(ray.__version__) >= Version("2.10.0"):
|
28
|
+
return ray._private.state.available_resources_per_node()
|
29
|
+
else:
|
30
|
+
return ray._private.state.state._available_resources_per_node()
|
31
|
+
|
32
|
+
|
22
33
|
@dataclass
|
23
34
|
class PlacementGroupConfig:
|
24
35
|
def __init__(self, opts, resource, node_ips):
|
@@ -90,9 +101,7 @@ class NodeGroupManager:
|
|
90
101
|
Returns:
|
91
102
|
group_res: a dict of resources, e.g., {'CPU':0,'memory':0,'object_store_memory':0}
|
92
103
|
"""
|
93
|
-
all_available_resources = (
|
94
|
-
ray._private.state.state._available_resources_per_node()
|
95
|
-
)
|
104
|
+
all_available_resources = _get_available_resources_per_node()
|
96
105
|
group_keys = [x[0] for x in self.init_groups]
|
97
106
|
group_res = {}
|
98
107
|
for k in group_keys:
|
@@ -127,9 +136,7 @@ class NodeGroupManager:
|
|
127
136
|
Returns:
|
128
137
|
group_res: dict of updated resource(cpu, memory, object store memory) for a given group
|
129
138
|
"""
|
130
|
-
all_available_resources = (
|
131
|
-
ray._private.state.state._available_resources_per_node()
|
132
|
-
)
|
139
|
+
all_available_resources = _get_available_resources_per_node()
|
133
140
|
group_res = {"CPU": 0, "memory": 0, "object_store_memory": 0, "node_id": []}
|
134
141
|
for v in all_available_resources.values():
|
135
142
|
keys = v.keys()
|
@@ -285,7 +292,7 @@ def _config(
|
|
285
292
|
for bd in bundles:
|
286
293
|
node_ids.append(bd["node_id"])
|
287
294
|
# query available resources given list of node id
|
288
|
-
all_nodes_available_res =
|
295
|
+
all_nodes_available_res = _get_available_resources_per_node()
|
289
296
|
pg_res = {"CPU": 0, "memory": 0, "object_store_memory": 0}
|
290
297
|
node_ips = []
|
291
298
|
for node_id in node_ids:
|
@@ -1,11 +1,11 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=bZ2r6PnxfaWRxylN1GFZYEkxvWqx381U-7XuK3NEjq0,1777
|
2
2
|
deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
|
3
3
|
deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
|
4
|
-
deltacat/logs.py,sha256=
|
4
|
+
deltacat/logs.py,sha256=6g16VkEFidbaMjgenAjggE1r2l664drMVhreRs8B1IQ,8438
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
deltacat/aws/clients.py,sha256=VgddlV3AEjlBGIFmhhHxokYzwJ-lXnmHAeprVyADduI,6948
|
7
|
-
deltacat/aws/constants.py,sha256=
|
8
|
-
deltacat/aws/s3u.py,sha256=
|
7
|
+
deltacat/aws/constants.py,sha256=aAhOKeLVgtpekA3h9otHUrHqY2bLDWs2QlL7GrdI63g,352
|
8
|
+
deltacat/aws/s3u.py,sha256=lgoE6es6N4xfzwyydxmVspROP1hrNfanB6JqjyBoRb4,24859
|
9
9
|
deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
|
10
10
|
deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
deltacat/aws/redshift/model/manifest.py,sha256=ThgpdwzaWz493Zz9e8HSWwuxEheA1nDuypM3pe4vozk,12987
|
@@ -46,12 +46,12 @@ deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J
|
|
46
46
|
deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
47
|
deltacat/compute/compactor/utils/io.py,sha256=S-JZdjETP_tHblK4j860jLHyX9S6A87BPz3Rl0jGbRM,17303
|
48
48
|
deltacat/compute/compactor/utils/primary_key_index.py,sha256=ay2-7t4mP9I_l5gKkrv5h5_r8Icts8mBcbH7OJBknrY,2435
|
49
|
-
deltacat/compute/compactor/utils/round_completion_file.py,sha256
|
49
|
+
deltacat/compute/compactor/utils/round_completion_file.py,sha256=-j6ZzhJBDrJ6Vz6WKEC-yKcNElkKRfO6S0P2JdJDQD0,2345
|
50
50
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
51
51
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
52
52
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
54
|
-
deltacat/compute/compactor_v2/constants.py,sha256=
|
53
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=hC0zde_3jq9W9bO8Z1XwsziK33qGeCFfyqFp1ZRWBJI,25193
|
54
|
+
deltacat/compute/compactor_v2/constants.py,sha256=jGLEK5uS7AcnoVjPGUDIO4ljDbBYZlqzQleKJRKvnZM,2118
|
55
55
|
deltacat/compute/compactor_v2/deletes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
56
|
deltacat/compute/compactor_v2/deletes/delete_file_envelope.py,sha256=AeuH9JRMwp6mvQf6P2cqL92hUEtResQq6qUTS0kIKac,3111
|
57
57
|
deltacat/compute/compactor_v2/deletes/delete_strategy.py,sha256=SMEJOxR-5r92kvKNqtu2w6HmwtmhljcZX1wcNEuS-4w,2833
|
@@ -72,7 +72,7 @@ deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkw
|
|
72
72
|
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
|
73
73
|
deltacat/compute/compactor_v2/utils/delta.py,sha256=8hjkDeIIkSX-gAQ2utQSp2sZcO2tWZHMTxpFusZwBHw,3635
|
74
74
|
deltacat/compute/compactor_v2/utils/io.py,sha256=autXlE3uHICdCCuJoS7mfdeJbRRiz2_xlz-3izlccB4,5264
|
75
|
-
deltacat/compute/compactor_v2/utils/merge.py,sha256=
|
75
|
+
deltacat/compute/compactor_v2/utils/merge.py,sha256=hK4Y7acrtgfvWWTz-fAGznEg6qn6dBYu8blQUQVHhs0,5244
|
76
76
|
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=MAscmL35WfwN7Is72aFlD_cGhxtZgjRwwR5kS9Yn2uU,11393
|
77
77
|
deltacat/compute/compactor_v2/utils/task_options.py,sha256=MCY0Sz5NCgNMaY92W8p87FvvDB91mnPQ4AhL8ix3BiA,13780
|
78
78
|
deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
|
@@ -133,8 +133,10 @@ deltacat/storage/model/table.py,sha256=IOu1ZOrdRkVDB-FOxYMRvnNf5TukIDfbdHWTqHYN_
|
|
133
133
|
deltacat/storage/model/table_version.py,sha256=cOM9dN-YB_Hhi4h1CzFbldC5qRkm4C1rQ3rpKIZzCNs,7413
|
134
134
|
deltacat/storage/model/types.py,sha256=hj7MmjjVmKT-R9sMUulOWG-FByGZKKaYXNnOWW32mP0,1608
|
135
135
|
deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
136
|
+
deltacat/tests/test_logs.py,sha256=6BEMw8VApFg2msFwCAVosz8NWJYATtX5furHyz8UluM,3828
|
136
137
|
deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
137
138
|
deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
|
139
|
+
deltacat/tests/aws/test_s3u.py,sha256=QflXbR94o7WobGBm6jhQDK5lJJD2Pd9z2uvi4J7WEJg,437
|
138
140
|
deltacat/tests/catalog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
139
141
|
deltacat/tests/catalog/test_default_catalog_impl.py,sha256=9srCU5yQ159oZ9_PoJ_mWMzVUW5bKV0mnmPJc5zKCQQ,3125
|
140
142
|
deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -175,6 +177,7 @@ deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
175
177
|
deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iyzjiJ7-Rcl0o,1506
|
176
178
|
deltacat/tests/utils/test_daft.py,sha256=Xal84zR42rXsWQI3lImdDYWOzewomKmhmiUQ59m67V0,6488
|
177
179
|
deltacat/tests/utils/test_metrics.py,sha256=Ym9nOz1EtB180pLmvugihj1sDTNDMb5opIjjr5Nmcls,16339
|
180
|
+
deltacat/tests/utils/test_placement.py,sha256=g61wVOMkHe4YJeR9Oxg_BOVQ6bhHHbC3IBYv8YhUu94,597
|
178
181
|
deltacat/tests/utils/test_pyarrow.py,sha256=eZAuYp9MUf8lmpIilH57JkURuNsTGZ3IAGC4Gm5hdrM,17307
|
179
182
|
deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
|
180
183
|
deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
|
@@ -192,7 +195,7 @@ deltacat/utils/metrics.py,sha256=HYKyZSrtVLu8gXezg_TMNUKJp4h1WWI0VEzn0Xlzf-I,107
|
|
192
195
|
deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
|
193
196
|
deltacat/utils/pandas.py,sha256=GfwjYb8FUSEeoBdXZI1_NJkdjxPMbCCUhlyRfGbDkn8,9562
|
194
197
|
deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
|
195
|
-
deltacat/utils/placement.py,sha256=
|
198
|
+
deltacat/utils/placement.py,sha256=Lj20fb-eq8rgMdm_M2MBMfDLwhDM1sS1nJj2DvIK56s,12060
|
196
199
|
deltacat/utils/pyarrow.py,sha256=gYcoRhQoBoAFo69WNijMobrLGta4VASg8VarWPiB34Y,28979
|
197
200
|
deltacat/utils/resources.py,sha256=Ax1OgLLbZI4oYpp4Ki27OLaST-7I-AJgZwU87FVfY8g,8253
|
198
201
|
deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
|
@@ -203,8 +206,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
203
206
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
204
207
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
205
208
|
deltacat/utils/ray_utils/runtime.py,sha256=5eaBWTDm0IXVoc5Y6aacoVB-f0Mnv-K2ewyTSjHKHwM,5009
|
206
|
-
deltacat-1.1.
|
207
|
-
deltacat-1.1.
|
208
|
-
deltacat-1.1.
|
209
|
-
deltacat-1.1.
|
210
|
-
deltacat-1.1.
|
209
|
+
deltacat-1.1.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
210
|
+
deltacat-1.1.4.dist-info/METADATA,sha256=q6kOOCDYRqloN7Fz1n87rkl3LlVRoJR32c1a5dBAkhQ,1780
|
211
|
+
deltacat-1.1.4.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
212
|
+
deltacat-1.1.4.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
213
|
+
deltacat-1.1.4.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|