datashare-python 0.7.3__tar.gz → 0.8.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datashare_python-0.7.3 → datashare_python-0.8.2}/PKG-INFO +1 -1
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/config.py +11 -2
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/conftest.py +2 -1
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/dependencies.py +1 -1
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/discovery.py +2 -3
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/logging_.py +68 -15
- datashare_python-0.8.2/datashare_python/worker-template.tar.gz +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/pyproject.toml +1 -1
- datashare_python-0.7.3/datashare_python/worker-template.tar.gz +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/.gitignore +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/README.md +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/.gitignore +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/__init__.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/__main__.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/__init__.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/project.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/task.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/utils.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/cli/worker.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/constants.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/exceptions.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/interceptors.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/objects.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/task_client.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/template.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/types_.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/utils.py +0 -0
- {datashare_python-0.7.3 → datashare_python-0.8.2}/datashare_python/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.2
|
|
4
4
|
Summary: Manage Python tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from enum import StrEnum
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
from typing import Literal
|
|
3
4
|
|
|
@@ -78,13 +79,21 @@ class TemporalClientConfig(BaseModel):
|
|
|
78
79
|
LogLevel = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
|
|
79
80
|
|
|
80
81
|
|
|
82
|
+
class LogFormat(StrEnum):
|
|
83
|
+
JSON = "json"
|
|
84
|
+
LOGFMT = "logfmt"
|
|
85
|
+
DEFAULT = "default"
|
|
86
|
+
|
|
87
|
+
|
|
81
88
|
class LoggingConfig(BaseModel):
|
|
82
|
-
|
|
89
|
+
format: LogFormat = LogFormat.DEFAULT
|
|
83
90
|
loggers: dict[str, LogLevel]
|
|
84
91
|
|
|
85
92
|
|
|
86
93
|
_DEFAULT_LOGGERS = {datashare_python.__name__: "INFO"}
|
|
87
|
-
_DEFAULT_LOGGING_CONFIG = LoggingConfig(
|
|
94
|
+
_DEFAULT_LOGGING_CONFIG = LoggingConfig(
|
|
95
|
+
format=LogFormat.DEFAULT, loggers=_DEFAULT_LOGGERS
|
|
96
|
+
)
|
|
88
97
|
|
|
89
98
|
|
|
90
99
|
class WorkerConfig(ICIJSettings, BaseModel):
|
|
@@ -13,6 +13,7 @@ from temporalio import workflow
|
|
|
13
13
|
|
|
14
14
|
from datashare_python.config import (
|
|
15
15
|
DatashareClientConfig,
|
|
16
|
+
LogFormat,
|
|
16
17
|
LoggingConfig,
|
|
17
18
|
TemporalClientConfig,
|
|
18
19
|
WorkerConfig,
|
|
@@ -93,7 +94,7 @@ def event_loop(
|
|
|
93
94
|
@pytest.fixture(scope="session")
|
|
94
95
|
def test_worker_config() -> WorkerConfig:
|
|
95
96
|
logging_config = LoggingConfig(
|
|
96
|
-
|
|
97
|
+
format=LogFormat.DEFAULT,
|
|
97
98
|
loggers={
|
|
98
99
|
"datashare_python": "DEBUG",
|
|
99
100
|
"icij_common": "DEBUG",
|
|
@@ -40,7 +40,7 @@ def set_loggers(
|
|
|
40
40
|
worker_config: WorkerConfig, worker_id: str, loggers: dict[str, LogLevel]
|
|
41
41
|
) -> None:
|
|
42
42
|
setup_worker_loggers(
|
|
43
|
-
loggers=loggers, worker_id=worker_id,
|
|
43
|
+
loggers=loggers, worker_id=worker_id, log_format=worker_config.logging.format
|
|
44
44
|
)
|
|
45
45
|
logger.info("worker loggers ready to log 💬")
|
|
46
46
|
|
|
@@ -70,9 +70,8 @@ def discover(
|
|
|
70
70
|
deps = []
|
|
71
71
|
if deps_name is not None:
|
|
72
72
|
deps = discover_dependencies(deps_name)
|
|
73
|
-
for
|
|
74
|
-
|
|
75
|
-
deps.append(mandatory)
|
|
73
|
+
missing = [m for m in _MANDATORY_DEPS if m not in deps]
|
|
74
|
+
deps = missing + deps
|
|
76
75
|
if deps:
|
|
77
76
|
n_deps = len(deps)
|
|
78
77
|
discovered += "\n"
|
|
@@ -1,20 +1,39 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import numbers
|
|
2
3
|
import sys
|
|
3
4
|
from copy import copy
|
|
5
|
+
from typing import Any
|
|
4
6
|
|
|
7
|
+
import orjson
|
|
5
8
|
from icij_common.logging_utils import DATE_FMT, STREAM_HANDLER_FMT
|
|
6
|
-
from pythonjsonlogger.core import
|
|
9
|
+
from pythonjsonlogger.core import BaseJsonFormatter
|
|
7
10
|
from pythonjsonlogger.orjson import OrjsonFormatter
|
|
8
11
|
from temporalio import activity, workflow
|
|
9
12
|
|
|
10
|
-
from .config import LogLevel
|
|
13
|
+
from .config import LogFormat, LogLevel
|
|
11
14
|
from .interceptors import get_trace_context
|
|
12
15
|
|
|
16
|
+
_BASE_ATTRS = [
|
|
17
|
+
"asctime",
|
|
18
|
+
"exc_info",
|
|
19
|
+
"filename",
|
|
20
|
+
"funcName",
|
|
21
|
+
"levelname",
|
|
22
|
+
"levelno",
|
|
23
|
+
"lineno",
|
|
24
|
+
"module",
|
|
25
|
+
"msecs",
|
|
26
|
+
"message",
|
|
27
|
+
"msg",
|
|
28
|
+
"name",
|
|
29
|
+
"pathname",
|
|
30
|
+
]
|
|
13
31
|
_ACT_LOGGER_ATTRS = ["activity_type", "activity_id", "activity_run_id"]
|
|
14
32
|
_WF_LOGGED_ATTRS = ["workflow_type", "workflow_id", "workflow_run_id"]
|
|
15
33
|
_TRACE_CONTEXT_ATTRS = ["trace_id", "parent_id", "traceparent"]
|
|
34
|
+
|
|
16
35
|
_LOGGED_ATTRIBUTES = (
|
|
17
|
-
copy(
|
|
36
|
+
copy(_BASE_ATTRS)
|
|
18
37
|
+ _WF_LOGGED_ATTRS
|
|
19
38
|
+ _ACT_LOGGER_ATTRS
|
|
20
39
|
+ _TRACE_CONTEXT_ATTRS
|
|
@@ -28,7 +47,7 @@ _STREAM_HANDLER_FMT_WITH_WORKER_ID = (
|
|
|
28
47
|
|
|
29
48
|
|
|
30
49
|
def setup_worker_loggers(
|
|
31
|
-
loggers: dict[str, LogLevel], *, worker_id: str | None,
|
|
50
|
+
loggers: dict[str, LogLevel], *, worker_id: str | None, log_format: LogFormat
|
|
32
51
|
) -> None:
|
|
33
52
|
worker_filter = WorkerFilter(worker_id)
|
|
34
53
|
for logger_name, level_str in loggers.items():
|
|
@@ -36,7 +55,9 @@ def setup_worker_loggers(
|
|
|
36
55
|
logger = logging.getLogger(logger_name)
|
|
37
56
|
logger.setLevel(level)
|
|
38
57
|
logger.handlers = []
|
|
39
|
-
for handler in _get_worker_handlers(
|
|
58
|
+
for handler in _get_worker_handlers(
|
|
59
|
+
level, worker_filter, log_format=log_format
|
|
60
|
+
):
|
|
40
61
|
logger.addHandler(handler)
|
|
41
62
|
|
|
42
63
|
|
|
@@ -50,8 +71,13 @@ class WorkerFilter(logging.Filter):
|
|
|
50
71
|
record.worker_id = self.worker_id
|
|
51
72
|
if workflow.in_workflow():
|
|
52
73
|
wf_info = workflow.info()
|
|
74
|
+
wf_info = {
|
|
75
|
+
"workflow_run_id": wf_info.run_id,
|
|
76
|
+
"workflow_id": self.workflow_id,
|
|
77
|
+
"workflow_type": self.workflow_type,
|
|
78
|
+
}
|
|
53
79
|
for attr in _WF_LOGGED_ATTRS:
|
|
54
|
-
setattr(record, attr,
|
|
80
|
+
setattr(record, attr, wf_info[attr])
|
|
55
81
|
if activity.in_activity():
|
|
56
82
|
act_info = activity.info()
|
|
57
83
|
for attr in _ACT_LOGGER_ATTRS:
|
|
@@ -64,23 +90,50 @@ class WorkerFilter(logging.Filter):
|
|
|
64
90
|
|
|
65
91
|
|
|
66
92
|
def _get_worker_handlers(
|
|
67
|
-
level: int, worker_filter: WorkerFilter, *,
|
|
93
|
+
level: int, worker_filter: WorkerFilter, *, log_format: LogFormat
|
|
68
94
|
) -> list[logging.Handler]:
|
|
69
95
|
stream_handler = logging.StreamHandler(sys.stderr)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
fmt =
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
96
|
+
match log_format:
|
|
97
|
+
case LogFormat.JSON:
|
|
98
|
+
fmt = _json_formatter(datefmt=DATE_FMT)
|
|
99
|
+
case LogFormat.LOGFMT:
|
|
100
|
+
fmt = LogFmtFormatter(datefmt=DATE_FMT)
|
|
101
|
+
case LogFormat.DEFAULT:
|
|
102
|
+
if worker_filter.worker_id is not None:
|
|
103
|
+
fmt = _STREAM_HANDLER_FMT_WITH_WORKER_ID
|
|
104
|
+
else:
|
|
105
|
+
fmt = STREAM_HANDLER_FMT
|
|
106
|
+
fmt = logging.Formatter(fmt, DATE_FMT)
|
|
107
|
+
case _:
|
|
108
|
+
raise NotImplementedError(f"invalid log format: {log_format}")
|
|
78
109
|
stream_handler.setFormatter(fmt)
|
|
79
110
|
stream_handler.setLevel(level)
|
|
80
111
|
stream_handler.addFilter(worker_filter)
|
|
81
112
|
return [stream_handler]
|
|
82
113
|
|
|
83
114
|
|
|
115
|
+
class LogFmtFormatter(logging.Formatter):
|
|
116
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
117
|
+
logged = dict()
|
|
118
|
+
if record.exc_info and not record.exc_text:
|
|
119
|
+
record.exc_text = self.formatException(record.exc_info)
|
|
120
|
+
logged["exc_info"] = record.exc_text
|
|
121
|
+
for k, v in record.__dict__.items():
|
|
122
|
+
if k in _LOGGED_ATTRIBUTES and k != "exc_info":
|
|
123
|
+
logged[k] = _encode_value(v)
|
|
124
|
+
return " ".join(f"{k}={v}" for k, v in sorted(logged.items()))
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _encode_value(value: Any) -> str:
|
|
128
|
+
if value is None:
|
|
129
|
+
return ""
|
|
130
|
+
if isinstance(value, bool):
|
|
131
|
+
return "true" if value else "false"
|
|
132
|
+
if isinstance(value, numbers.Number):
|
|
133
|
+
return str(value)
|
|
134
|
+
return orjson.dumps(value).decode()
|
|
135
|
+
|
|
136
|
+
|
|
84
137
|
def _json_formatter(datefmt: str) -> BaseJsonFormatter:
|
|
85
138
|
fmt = OrjsonFormatter( # let's keep logging as fast as possible
|
|
86
139
|
_LOGGED_ATTRIBUTES, datefmt=datefmt
|
|
Binary file
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|