deltacat 1.1.3__py3-none-any.whl → 1.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/constants.py +0 -4
- deltacat/aws/s3u.py +26 -5
- deltacat/compute/compactor/utils/round_completion_file.py +3 -0
- deltacat/compute/compactor_v2/compaction_session.py +26 -5
- deltacat/compute/compactor_v2/constants.py +0 -3
- deltacat/compute/compactor_v2/utils/merge.py +0 -3
- deltacat/compute/compactor_v2/utils/task_options.py +4 -0
- deltacat/logs.py +126 -69
- deltacat/tests/aws/test_s3u.py +12 -0
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +30 -0
- deltacat/tests/compute/compact_partition_test_cases.py +62 -1
- deltacat/tests/compute/test_compact_partition_incremental.py +34 -9
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +6 -1
- deltacat/tests/compute/test_util_common.py +4 -4
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +18 -7
- deltacat/tests/test_logs.py +127 -0
- deltacat/tests/utils/test_placement.py +25 -0
- deltacat/utils/daft.py +2 -0
- deltacat/utils/placement.py +14 -7
- {deltacat-1.1.3.dist-info → deltacat-1.1.5.dist-info}/METADATA +2 -2
- {deltacat-1.1.3.dist-info → deltacat-1.1.5.dist-info}/RECORD +25 -22
- {deltacat-1.1.3.dist-info → deltacat-1.1.5.dist-info}/LICENSE +0 -0
- {deltacat-1.1.3.dist-info → deltacat-1.1.5.dist-info}/WHEEL +0 -0
- {deltacat-1.1.3.dist-info → deltacat-1.1.5.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
deltacat/aws/constants.py
CHANGED
@@ -6,7 +6,3 @@ DAFT_MAX_S3_CONNECTIONS_PER_FILE = env_integer("DAFT_MAX_S3_CONNECTIONS_PER_FILE
|
|
6
6
|
BOTO_MAX_RETRIES = env_integer("BOTO_MAX_RETRIES", 5)
|
7
7
|
TIMEOUT_ERROR_CODES: List[str] = ["ReadTimeoutError", "ConnectTimeoutError"]
|
8
8
|
AWS_REGION = env_string("AWS_REGION", "us-east-1")
|
9
|
-
|
10
|
-
# Metric Names
|
11
|
-
DOWNLOAD_MANIFEST_ENTRY_METRIC_PREFIX = "download_manifest_entry"
|
12
|
-
UPLOAD_SLICED_TABLE_METRIC_PREFIX = "upload_sliced_table"
|
deltacat/aws/s3u.py
CHANGED
@@ -27,8 +27,6 @@ import deltacat.aws.clients as aws_utils
|
|
27
27
|
from deltacat import logs
|
28
28
|
from deltacat.aws.constants import (
|
29
29
|
TIMEOUT_ERROR_CODES,
|
30
|
-
DOWNLOAD_MANIFEST_ENTRY_METRIC_PREFIX,
|
31
|
-
UPLOAD_SLICED_TABLE_METRIC_PREFIX,
|
32
30
|
)
|
33
31
|
from deltacat.exceptions import NonRetryableError, RetryableError
|
34
32
|
from deltacat.storage import (
|
@@ -54,7 +52,6 @@ from deltacat.types.tables import (
|
|
54
52
|
)
|
55
53
|
from deltacat.types.partial_download import PartialFileDownloadParams
|
56
54
|
from deltacat.utils.common import ReadKwargsProvider
|
57
|
-
from deltacat.utils.metrics import metrics
|
58
55
|
|
59
56
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
60
57
|
|
@@ -121,6 +118,32 @@ class UuidBlockWritePathProvider(BlockWritePathProvider):
|
|
121
118
|
self.block_refs.append(block)
|
122
119
|
return write_path
|
123
120
|
|
121
|
+
def __call__(
|
122
|
+
self,
|
123
|
+
base_path: str,
|
124
|
+
*,
|
125
|
+
filesystem: Optional[pa.filesystem.FileSystem] = None,
|
126
|
+
dataset_uuid: Optional[str] = None,
|
127
|
+
block: Optional[ObjectRef[Block]] = None,
|
128
|
+
block_index: Optional[int] = None,
|
129
|
+
file_format: Optional[str] = None,
|
130
|
+
) -> str:
|
131
|
+
"""
|
132
|
+
TODO: BlockWritePathProvider is deprecated as of Ray version 2.20.0. Please use FilenameProvider.
|
133
|
+
See: https://docs.ray.io/en/master/data/api/doc/ray.data.datasource.FilenameProvider.html
|
134
|
+
Also See: https://github.com/ray-project/deltacat/issues/299
|
135
|
+
|
136
|
+
Hence, this class only works with Ray version 2.20.0 or lower when used in Ray Dataset.
|
137
|
+
"""
|
138
|
+
return self._get_write_path_for_block(
|
139
|
+
base_path,
|
140
|
+
filesystem=filesystem,
|
141
|
+
dataset_uuid=dataset_uuid,
|
142
|
+
block=block,
|
143
|
+
block_index=block_index,
|
144
|
+
file_format=file_format,
|
145
|
+
)
|
146
|
+
|
124
147
|
|
125
148
|
class S3Url:
|
126
149
|
def __init__(self, url: str):
|
@@ -243,7 +266,6 @@ def read_file(
|
|
243
266
|
raise e
|
244
267
|
|
245
268
|
|
246
|
-
@metrics(prefix=UPLOAD_SLICED_TABLE_METRIC_PREFIX)
|
247
269
|
def upload_sliced_table(
|
248
270
|
table: Union[LocalTable, DistributedDataset],
|
249
271
|
s3_url_prefix: str,
|
@@ -352,7 +374,6 @@ def upload_table(
|
|
352
374
|
return manifest_entries
|
353
375
|
|
354
376
|
|
355
|
-
@metrics(prefix=DOWNLOAD_MANIFEST_ENTRY_METRIC_PREFIX)
|
356
377
|
def download_manifest_entry(
|
357
378
|
manifest_entry: ManifestEntry,
|
358
379
|
token_holder: Optional[Dict[str, Any]] = None,
|
@@ -6,6 +6,7 @@ from deltacat.compute.compactor import RoundCompletionInfo
|
|
6
6
|
from deltacat.storage import PartitionLocator
|
7
7
|
from deltacat.aws import s3u as s3_utils
|
8
8
|
from typing import Optional
|
9
|
+
from deltacat.utils.metrics import metrics
|
9
10
|
|
10
11
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
11
12
|
|
@@ -18,6 +19,7 @@ def get_round_completion_file_s3_url(
|
|
18
19
|
return f"{base_url}.json"
|
19
20
|
|
20
21
|
|
22
|
+
@metrics
|
21
23
|
def read_round_completion_file(
|
22
24
|
bucket: str,
|
23
25
|
source_partition_locator: PartitionLocator,
|
@@ -38,6 +40,7 @@ def read_round_completion_file(
|
|
38
40
|
return round_completion_info
|
39
41
|
|
40
42
|
|
43
|
+
@metrics
|
41
44
|
def write_round_completion_file(
|
42
45
|
bucket: Optional[str],
|
43
46
|
source_partition_locator: Optional[PartitionLocator],
|
@@ -17,7 +17,11 @@ from deltacat.compute.compactor_v2.model.merge_input import MergeInput
|
|
17
17
|
from deltacat.aws import s3u as s3_utils
|
18
18
|
import deltacat
|
19
19
|
from deltacat import logs
|
20
|
-
from deltacat.compute.compactor import
|
20
|
+
from deltacat.compute.compactor import (
|
21
|
+
HighWatermark,
|
22
|
+
PyArrowWriteResult,
|
23
|
+
RoundCompletionInfo,
|
24
|
+
)
|
21
25
|
from deltacat.compute.compactor_v2.model.merge_result import MergeResult
|
22
26
|
from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
|
23
27
|
from deltacat.compute.compactor.model.materialize_result import MaterializeResult
|
@@ -37,6 +41,7 @@ from deltacat.compute.compactor_v2.deletes.utils import prepare_deletes
|
|
37
41
|
from deltacat.storage import (
|
38
42
|
Delta,
|
39
43
|
DeltaLocator,
|
44
|
+
Manifest,
|
40
45
|
Partition,
|
41
46
|
)
|
42
47
|
from deltacat.compute.compactor.model.compact_partition_params import (
|
@@ -50,6 +55,7 @@ from deltacat.compute.compactor_v2.steps import merge as mg
|
|
50
55
|
from deltacat.compute.compactor_v2.steps import hash_bucket as hb
|
51
56
|
from deltacat.compute.compactor_v2.utils import io
|
52
57
|
from deltacat.compute.compactor.utils import round_completion_file as rcf
|
58
|
+
from deltacat.utils.metrics import metrics
|
53
59
|
|
54
60
|
from typing import List, Optional, Tuple
|
55
61
|
from collections import defaultdict
|
@@ -73,6 +79,7 @@ if importlib.util.find_spec("memray"):
|
|
73
79
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
74
80
|
|
75
81
|
|
82
|
+
@metrics
|
76
83
|
def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]:
|
77
84
|
|
78
85
|
assert (
|
@@ -94,7 +101,7 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
|
|
94
101
|
round_completion_file_s3_url = None
|
95
102
|
if new_partition:
|
96
103
|
logger.info(f"Committing compacted partition to: {new_partition.locator}")
|
97
|
-
partition = params.deltacat_storage.commit_partition(
|
104
|
+
partition: Partition = params.deltacat_storage.commit_partition(
|
98
105
|
new_partition, **params.deltacat_storage_kwargs
|
99
106
|
)
|
100
107
|
logger.info(f"Committed compacted partition: {partition}")
|
@@ -148,9 +155,9 @@ def _execute_compaction(
|
|
148
155
|
compaction_audit.set_total_cluster_memory_bytes(cluster_memory)
|
149
156
|
|
150
157
|
# read the results from any previously completed compaction round
|
151
|
-
round_completion_info = None
|
152
|
-
high_watermark = None
|
153
|
-
previous_compacted_delta_manifest = None
|
158
|
+
round_completion_info: Optional[RoundCompletionInfo] = None
|
159
|
+
high_watermark: Optional[HighWatermark] = None
|
160
|
+
previous_compacted_delta_manifest: Optional[Manifest] = None
|
154
161
|
|
155
162
|
if not params.rebase_source_partition_locator:
|
156
163
|
round_completion_info = rcf.read_round_completion_file(
|
@@ -269,6 +276,7 @@ def _execute_compaction(
|
|
269
276
|
total_hb_record_count = np.int64(0)
|
270
277
|
telemetry_time_hb = 0
|
271
278
|
if params.hash_bucket_count == 1:
|
279
|
+
logger.info("Hash bucket count set to 1. Running local merge")
|
272
280
|
merge_start = time.monotonic()
|
273
281
|
local_merge_input = generate_local_merge_input(
|
274
282
|
params,
|
@@ -632,6 +640,19 @@ def _execute_compaction(
|
|
632
640
|
f"partition-{params.source_partition_locator.partition_values},"
|
633
641
|
f"compacted at: {params.last_stream_position_to_compact},"
|
634
642
|
)
|
643
|
+
is_inplace_compacted: bool = (
|
644
|
+
params.source_partition_locator.partition_values
|
645
|
+
== params.destination_partition_locator.partition_values
|
646
|
+
and params.source_partition_locator.stream_id
|
647
|
+
== params.destination_partition_locator.stream_id
|
648
|
+
)
|
649
|
+
if is_inplace_compacted:
|
650
|
+
logger.info(
|
651
|
+
"Overriding round completion file source partition locator as in-place compacted. "
|
652
|
+
+ f"Got compacted partition partition_id of {compacted_partition.locator.partition_id} "
|
653
|
+
f"and rcf source partition_id of {rcf_source_partition_locator.partition_id}."
|
654
|
+
)
|
655
|
+
rcf_source_partition_locator = compacted_partition.locator
|
635
656
|
return (
|
636
657
|
compacted_partition,
|
637
658
|
new_round_completion_info,
|
@@ -31,14 +31,11 @@ from deltacat.compute.compactor_v2.deletes.delete_strategy import (
|
|
31
31
|
from deltacat.compute.compactor_v2.deletes.delete_file_envelope import (
|
32
32
|
DeleteFileEnvelope,
|
33
33
|
)
|
34
|
-
from deltacat.utils.metrics import metrics
|
35
|
-
from deltacat.compute.compactor_v2.constants import MATERIALIZE_METRIC_PREFIX
|
36
34
|
|
37
35
|
|
38
36
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
39
37
|
|
40
38
|
|
41
|
-
@metrics(prefix=MATERIALIZE_METRIC_PREFIX)
|
42
39
|
def materialize(
|
43
40
|
input: MergeInput,
|
44
41
|
task_index: int,
|
@@ -21,6 +21,9 @@ from deltacat.compute.compactor_v2.constants import (
|
|
21
21
|
PARQUET_TO_PYARROW_INFLATION,
|
22
22
|
)
|
23
23
|
|
24
|
+
from daft.exceptions import DaftTransientError
|
25
|
+
|
26
|
+
|
24
27
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
25
28
|
|
26
29
|
|
@@ -76,6 +79,7 @@ def get_task_options(
|
|
76
79
|
botocore.exceptions.HTTPClientError,
|
77
80
|
ConnectionError,
|
78
81
|
TimeoutError,
|
82
|
+
DaftTransientError,
|
79
83
|
]
|
80
84
|
|
81
85
|
return task_opts
|
deltacat/logs.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
|
+
import json
|
3
4
|
import pathlib
|
4
5
|
from logging import FileHandler, Handler, Logger, LoggerAdapter, handlers
|
5
6
|
from typing import Any, Dict, Optional, Union
|
@@ -19,13 +20,106 @@ from deltacat.constants import (
|
|
19
20
|
)
|
20
21
|
|
21
22
|
DEFAULT_LOG_LEVEL = "INFO"
|
22
|
-
DEFAULT_LOG_FORMAT =
|
23
|
-
"
|
24
|
-
|
23
|
+
DEFAULT_LOG_FORMAT = {
|
24
|
+
"level": "levelname",
|
25
|
+
"message": "message",
|
26
|
+
"loggerName": "name",
|
27
|
+
"processName": "processName",
|
28
|
+
"processID": "process",
|
29
|
+
"threadName": "threadName",
|
30
|
+
"timestamp": "asctime",
|
31
|
+
"filename": "filename",
|
32
|
+
"lineno": "lineno",
|
33
|
+
}
|
25
34
|
DEFAULT_MAX_BYTES_PER_LOG = 2 ^ 20 * 256 # 256 MiB
|
26
35
|
DEFAULT_BACKUP_COUNT = 0
|
27
36
|
|
28
37
|
|
38
|
+
class JsonFormatter(logging.Formatter):
|
39
|
+
"""
|
40
|
+
Formatter that outputs JSON strings after parsing the LogRecord.
|
41
|
+
|
42
|
+
@param dict fmt_dict: Key: logging format attribute pairs. Defaults to {"message": "message"}.
|
43
|
+
@param str time_format: time.strftime() format string. Default: "%Y-%m-%dT%H:%M:%S"
|
44
|
+
@param str msec_format: Microsecond formatting. Appended at the end. Default: "%s.%03dZ"
|
45
|
+
"""
|
46
|
+
|
47
|
+
def __init__(
|
48
|
+
self,
|
49
|
+
fmt_dict: dict = None,
|
50
|
+
time_format: str = "%Y-%m-%dT%H:%M:%S",
|
51
|
+
msec_format: str = "%s.%03dZ",
|
52
|
+
):
|
53
|
+
self.fmt_dict = fmt_dict if fmt_dict is not None else {"message": "message"}
|
54
|
+
self.default_time_format = time_format
|
55
|
+
self.default_msec_format = msec_format
|
56
|
+
self.datefmt = None
|
57
|
+
if ray.is_initialized():
|
58
|
+
self.ray_runtime_ctx: RuntimeContext = ray.get_runtime_context()
|
59
|
+
self.context = {}
|
60
|
+
self.context["worker_id"] = self.ray_runtime_ctx.get_worker_id()
|
61
|
+
self.context["node_id"] = self.ray_runtime_ctx.get_node_id()
|
62
|
+
self.context["job_id"] = self.ray_runtime_ctx.get_job_id()
|
63
|
+
else:
|
64
|
+
self.ray_runtime_ctx = None
|
65
|
+
self.context = {}
|
66
|
+
|
67
|
+
def usesTime(self) -> bool:
|
68
|
+
"""
|
69
|
+
Overwritten to look for the attribute in the format dict values instead of the fmt string.
|
70
|
+
"""
|
71
|
+
return "asctime" in self.fmt_dict.values()
|
72
|
+
|
73
|
+
def formatMessage(self, record) -> dict:
|
74
|
+
"""
|
75
|
+
Overwritten to return a dictionary of the relevant LogRecord attributes instead of a string.
|
76
|
+
KeyError is raised if an unknown attribute is provided in the fmt_dict.
|
77
|
+
"""
|
78
|
+
return {
|
79
|
+
fmt_key: record.__dict__[fmt_val]
|
80
|
+
for fmt_key, fmt_val in self.fmt_dict.items()
|
81
|
+
}
|
82
|
+
|
83
|
+
def format(self, record) -> str:
|
84
|
+
"""
|
85
|
+
Mostly the same as the parent's class method, the difference being that a dict is manipulated and dumped as JSON
|
86
|
+
instead of a string.
|
87
|
+
"""
|
88
|
+
record.message = record.getMessage()
|
89
|
+
|
90
|
+
if self.usesTime():
|
91
|
+
record.asctime = self.formatTime(record, self.datefmt)
|
92
|
+
|
93
|
+
message_dict = self.formatMessage(record)
|
94
|
+
|
95
|
+
if record.exc_info:
|
96
|
+
# Cache the traceback text to avoid converting it multiple times
|
97
|
+
# (it's constant anyway)
|
98
|
+
if not record.exc_text:
|
99
|
+
record.exc_text = self.formatException(record.exc_info)
|
100
|
+
|
101
|
+
if record.exc_text:
|
102
|
+
message_dict["exc_info"] = record.exc_text
|
103
|
+
|
104
|
+
if record.stack_info:
|
105
|
+
message_dict["stack_info"] = self.formatStack(record.stack_info)
|
106
|
+
|
107
|
+
if self.ray_runtime_ctx:
|
108
|
+
# only workers will have task ID
|
109
|
+
if (
|
110
|
+
self.ray_runtime_ctx.worker
|
111
|
+
and self.ray_runtime_ctx.worker.mode == ray._private.worker.WORKER_MODE
|
112
|
+
):
|
113
|
+
self.context["task_id"] = self.ray_runtime_ctx.get_task_id()
|
114
|
+
self.context[
|
115
|
+
"assigned_resources"
|
116
|
+
] = self.ray_runtime_ctx.get_assigned_resources()
|
117
|
+
|
118
|
+
message_dict["ray_runtime_context"] = self.context
|
119
|
+
|
120
|
+
return json.dumps(message_dict, default=str)
|
121
|
+
|
122
|
+
|
29
123
|
class DeltaCATLoggerAdapter(logging.LoggerAdapter):
|
30
124
|
"""
|
31
125
|
Logger Adapter class with additional functionality
|
@@ -51,54 +145,6 @@ class DeltaCATLoggerAdapter(logging.LoggerAdapter):
|
|
51
145
|
self.error(msg, *args, **kwargs)
|
52
146
|
|
53
147
|
|
54
|
-
class RayRuntimeContextLoggerAdapter(DeltaCATLoggerAdapter):
|
55
|
-
"""
|
56
|
-
Logger Adapter for injecting Ray Runtime Context into logging messages.
|
57
|
-
"""
|
58
|
-
|
59
|
-
def __init__(self, logger: Logger, runtime_context: RuntimeContext):
|
60
|
-
super().__init__(logger, {})
|
61
|
-
self.runtime_context = runtime_context
|
62
|
-
|
63
|
-
def process(self, msg, kwargs):
|
64
|
-
"""
|
65
|
-
Injects Ray Runtime Context details into each log message.
|
66
|
-
|
67
|
-
This may include information such as the raylet node ID, task/actor ID, job ID,
|
68
|
-
placement group ID of the worker, and assigned resources to the task/actor.
|
69
|
-
|
70
|
-
Args:
|
71
|
-
msg: The original log message
|
72
|
-
kwargs: Keyword arguments for the log message
|
73
|
-
|
74
|
-
Returns: A log message with Ray Runtime Context details
|
75
|
-
|
76
|
-
"""
|
77
|
-
runtime_context_dict = self.runtime_context.get()
|
78
|
-
runtime_context_dict[
|
79
|
-
"worker_id"
|
80
|
-
] = self.runtime_context.worker.core_worker.get_worker_id()
|
81
|
-
if self.runtime_context.get_task_id() or self.runtime_context.get_actor_id():
|
82
|
-
runtime_context_dict[
|
83
|
-
"pg_id"
|
84
|
-
] = self.runtime_context.get_placement_group_id()
|
85
|
-
runtime_context_dict[
|
86
|
-
"assigned_resources"
|
87
|
-
] = self.runtime_context.get_assigned_resources()
|
88
|
-
|
89
|
-
return "(ray_runtime_context=%s) -- %s" % (runtime_context_dict, msg), kwargs
|
90
|
-
|
91
|
-
def __reduce__(self):
|
92
|
-
"""
|
93
|
-
Used to unpickle the class during Ray object store transfer.
|
94
|
-
"""
|
95
|
-
|
96
|
-
def deserializer(*args):
|
97
|
-
return RayRuntimeContextLoggerAdapter(args[0], ray.get_runtime_context())
|
98
|
-
|
99
|
-
return deserializer, (self.logger,)
|
100
|
-
|
101
|
-
|
102
148
|
def _add_logger_handler(logger: Logger, handler: Handler) -> Logger:
|
103
149
|
|
104
150
|
logger.setLevel(logging.getLevelName("DEBUG"))
|
@@ -109,10 +155,10 @@ def _add_logger_handler(logger: Logger, handler: Handler) -> Logger:
|
|
109
155
|
def _create_rotating_file_handler(
|
110
156
|
log_directory: str,
|
111
157
|
log_base_file_name: str,
|
112
|
-
logging_level: str = DEFAULT_LOG_LEVEL,
|
158
|
+
logging_level: Union[str, int] = DEFAULT_LOG_LEVEL,
|
113
159
|
max_bytes_per_log_file: int = DEFAULT_MAX_BYTES_PER_LOG,
|
114
160
|
backup_count: int = DEFAULT_BACKUP_COUNT,
|
115
|
-
logging_format: str = DEFAULT_LOG_FORMAT,
|
161
|
+
logging_format: Union[str, dict] = DEFAULT_LOG_FORMAT,
|
116
162
|
) -> FileHandler:
|
117
163
|
|
118
164
|
if type(logging_level) is str:
|
@@ -126,7 +172,12 @@ def _create_rotating_file_handler(
|
|
126
172
|
maxBytes=max_bytes_per_log_file,
|
127
173
|
backupCount=backup_count,
|
128
174
|
)
|
129
|
-
|
175
|
+
|
176
|
+
if type(logging_format) is str:
|
177
|
+
handler.setFormatter(logging.Formatter(logging_format))
|
178
|
+
else:
|
179
|
+
handler.setFormatter(JsonFormatter(logging_format))
|
180
|
+
|
130
181
|
handler.setLevel(logging_level)
|
131
182
|
return handler
|
132
183
|
|
@@ -135,7 +186,8 @@ def _file_handler_exists(logger: Logger, log_dir: str, log_base_file_name: str)
|
|
135
186
|
|
136
187
|
handler_exists = False
|
137
188
|
base_file_path = os.path.join(log_dir, log_base_file_name)
|
138
|
-
|
189
|
+
|
190
|
+
if logger.handlers:
|
139
191
|
norm_base_file_path = os.path.normpath(base_file_path)
|
140
192
|
handler_exists = any(
|
141
193
|
[
|
@@ -149,49 +201,54 @@ def _file_handler_exists(logger: Logger, log_dir: str, log_base_file_name: str)
|
|
149
201
|
|
150
202
|
def _configure_logger(
|
151
203
|
logger: Logger,
|
152
|
-
log_level:
|
204
|
+
log_level: int,
|
153
205
|
log_dir: str,
|
154
206
|
log_base_file_name: str,
|
155
207
|
debug_log_base_file_name: str,
|
156
208
|
) -> Union[Logger, LoggerAdapter]:
|
209
|
+
# This maintains log level of rotating file handlers
|
157
210
|
primary_log_level = log_level
|
158
211
|
logger.propagate = False
|
159
|
-
if log_level.
|
212
|
+
if log_level <= logging.getLevelName("DEBUG"):
|
160
213
|
if not _file_handler_exists(logger, log_dir, debug_log_base_file_name):
|
161
214
|
handler = _create_rotating_file_handler(
|
162
215
|
log_dir, debug_log_base_file_name, "DEBUG"
|
163
216
|
)
|
164
217
|
_add_logger_handler(logger, handler)
|
165
|
-
primary_log_level = "INFO"
|
218
|
+
primary_log_level = logging.getLevelName("INFO")
|
166
219
|
if not _file_handler_exists(logger, log_dir, log_base_file_name):
|
167
220
|
handler = _create_rotating_file_handler(
|
168
221
|
log_dir, log_base_file_name, primary_log_level
|
169
222
|
)
|
170
223
|
_add_logger_handler(logger, handler)
|
171
|
-
if ray.is_initialized():
|
172
|
-
ray_runtime_ctx = ray.get_runtime_context()
|
173
|
-
if ray_runtime_ctx.worker.connected:
|
174
|
-
logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
|
175
|
-
else:
|
176
|
-
logger = DeltaCATLoggerAdapter(logger)
|
177
224
|
|
178
|
-
return logger
|
225
|
+
return DeltaCATLoggerAdapter(logger)
|
226
|
+
|
179
227
|
|
228
|
+
def configure_deltacat_logger(
|
229
|
+
logger: Logger, level: int = None
|
230
|
+
) -> Union[Logger, LoggerAdapter]:
|
231
|
+
if level is None:
|
232
|
+
level = logging.getLevelName(DELTACAT_SYS_LOG_LEVEL)
|
180
233
|
|
181
|
-
def configure_deltacat_logger(logger: Logger) -> Union[Logger, LoggerAdapter]:
|
182
234
|
return _configure_logger(
|
183
235
|
logger,
|
184
|
-
|
236
|
+
level,
|
185
237
|
DELTACAT_SYS_LOG_DIR,
|
186
238
|
DELTACAT_SYS_INFO_LOG_BASE_FILE_NAME,
|
187
239
|
DELTACAT_SYS_DEBUG_LOG_BASE_FILE_NAME,
|
188
240
|
)
|
189
241
|
|
190
242
|
|
191
|
-
def configure_application_logger(
|
243
|
+
def configure_application_logger(
|
244
|
+
logger: Logger, level: int = None
|
245
|
+
) -> Union[Logger, LoggerAdapter]:
|
246
|
+
if level is None:
|
247
|
+
level = logging.getLevelName(DELTACAT_APP_LOG_LEVEL)
|
248
|
+
|
192
249
|
return _configure_logger(
|
193
250
|
logger,
|
194
|
-
|
251
|
+
level,
|
195
252
|
DELTACAT_APP_LOG_DIR,
|
196
253
|
DELTACAT_APP_INFO_LOG_BASE_FILE_NAME,
|
197
254
|
DELTACAT_APP_DEBUG_LOG_BASE_FILE_NAME,
|
@@ -0,0 +1,12 @@
|
|
1
|
+
import unittest
|
2
|
+
from deltacat.aws.s3u import UuidBlockWritePathProvider, CapturedBlockWritePaths
|
3
|
+
|
4
|
+
|
5
|
+
class TestUuidBlockWritePathProvider(unittest.TestCase):
|
6
|
+
def test_uuid_block_write_provider_sanity(self):
|
7
|
+
capture_object = CapturedBlockWritePaths()
|
8
|
+
provider = UuidBlockWritePathProvider(capture_object=capture_object)
|
9
|
+
|
10
|
+
result = provider("base_path")
|
11
|
+
|
12
|
+
self.assertRegex(result, r"^base_path/[\w-]{36}$")
|