datashare-python 0.6.3__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datashare_python-0.6.3 → datashare_python-0.7.1}/PKG-INFO +8 -7
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/config.py +13 -5
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/conftest.py +12 -11
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/dependencies.py +8 -4
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/discovery.py +6 -3
- datashare_python-0.7.1/datashare_python/interceptors.py +198 -0
- datashare_python-0.7.1/datashare_python/logging_.py +86 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/utils.py +2 -0
- datashare_python-0.7.1/datashare_python/worker-template.tar.gz +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/worker.py +25 -1
- {datashare_python-0.6.3 → datashare_python-0.7.1}/pyproject.toml +8 -7
- datashare_python-0.6.3/datashare_python/worker-template.tar.gz +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/.gitignore +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/README.md +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/.gitignore +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/__init__.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/__main__.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/__init__.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/project.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/task.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/utils.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/worker.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/constants.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/exceptions.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/objects.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/task_client.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/template.py +0 -0
- {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/types_.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: Manage Pythoœn tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -8,13 +8,14 @@ Project-URL: Repository, https://github.com/ICIJ/datashare-python
|
|
|
8
8
|
Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
|
|
9
9
|
Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
|
|
10
10
|
Requires-Python: <4,>=3.11
|
|
11
|
-
Requires-Dist: aiohttp~=3.11
|
|
12
|
-
Requires-Dist: alive-progress~=3.2
|
|
13
|
-
Requires-Dist: hatchling~=1.27
|
|
11
|
+
Requires-Dist: aiohttp~=3.11
|
|
12
|
+
Requires-Dist: alive-progress~=3.2
|
|
13
|
+
Requires-Dist: hatchling~=1.27
|
|
14
14
|
Requires-Dist: icij-common[elasticsearch]~=0.8.2
|
|
15
|
-
Requires-Dist: nest-asyncio~=1.6
|
|
16
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: nest-asyncio~=1.6
|
|
16
|
+
Requires-Dist: orjson~=3.11
|
|
17
|
+
Requires-Dist: python-json-logger~=4.0
|
|
17
18
|
Requires-Dist: pyyaml~=6.0
|
|
18
|
-
Requires-Dist: temporalio~=1.23
|
|
19
|
+
Requires-Dist: temporalio~=1.23
|
|
19
20
|
Requires-Dist: tomlkit~=0.14.0
|
|
20
21
|
Requires-Dist: typer~=0.15.4
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Annotated, Literal
|
|
3
3
|
|
|
4
4
|
from icij_common.es import ESClient
|
|
5
5
|
from icij_common.pydantic_utils import ICIJSettings
|
|
@@ -18,7 +18,6 @@ import datashare_python
|
|
|
18
18
|
from .objects import BaseModel
|
|
19
19
|
from .task_client import DatashareTaskClient
|
|
20
20
|
from .types_ import TemporalClient
|
|
21
|
-
from .utils import LogWithWorkerIDMixin
|
|
22
21
|
|
|
23
22
|
_ALL_LOGGERS = [datashare_python.__name__]
|
|
24
23
|
|
|
@@ -76,11 +75,20 @@ class TemporalClientConfig(BaseModel):
|
|
|
76
75
|
return self._client
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
|
|
78
|
+
LogLevel = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LoggingConfig(BaseModel):
|
|
82
|
+
log_in_json: bool = False
|
|
83
|
+
loggers: dict[str, LogLevel]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class WorkerConfig(ICIJSettings, BaseModel):
|
|
80
87
|
model_config = DS_WORKER_SETTINGS_CONFIG
|
|
81
88
|
|
|
82
|
-
|
|
83
|
-
|
|
89
|
+
logging: Annotated[LoggingConfig, Field(frozen=True)] = {
|
|
90
|
+
datashare_python.__name__: "INFO"
|
|
91
|
+
}
|
|
84
92
|
|
|
85
93
|
datashare: DatashareClientConfig = DatashareClientConfig()
|
|
86
94
|
elasticsearch: ESClientConfig = ESClientConfig()
|
|
@@ -8,10 +8,12 @@ import nest_asyncio
|
|
|
8
8
|
import pytest
|
|
9
9
|
from elasticsearch._async.helpers import async_streaming_bulk
|
|
10
10
|
from icij_common.es import DOC_ROOT_ID, ES_DOCUMENT_TYPE, ID, ESClient
|
|
11
|
+
from icij_common.test_utils import reset_env # noqa: F401
|
|
11
12
|
from temporalio import workflow
|
|
12
13
|
|
|
13
14
|
from datashare_python.config import (
|
|
14
15
|
DatashareClientConfig,
|
|
16
|
+
LoggingConfig,
|
|
15
17
|
TemporalClientConfig,
|
|
16
18
|
WorkerConfig,
|
|
17
19
|
)
|
|
@@ -19,10 +21,7 @@ from datashare_python.dependencies import (
|
|
|
19
21
|
lifespan_es_client,
|
|
20
22
|
lifespan_task_client,
|
|
21
23
|
set_es_client,
|
|
22
|
-
set_event_loop,
|
|
23
|
-
set_loggers,
|
|
24
24
|
set_task_client,
|
|
25
|
-
set_temporal_client,
|
|
26
25
|
with_dependencies,
|
|
27
26
|
)
|
|
28
27
|
from datashare_python.objects import Document, TaskState
|
|
@@ -78,13 +77,7 @@ class MockedWorkflow:
|
|
|
78
77
|
|
|
79
78
|
@pytest.fixture(scope="session")
|
|
80
79
|
def test_deps() -> list[ContextManagerFactory]:
|
|
81
|
-
return [
|
|
82
|
-
set_loggers,
|
|
83
|
-
set_event_loop,
|
|
84
|
-
set_es_client,
|
|
85
|
-
set_temporal_client,
|
|
86
|
-
set_task_client,
|
|
87
|
-
]
|
|
80
|
+
return [set_es_client, set_task_client]
|
|
88
81
|
|
|
89
82
|
|
|
90
83
|
@pytest.fixture(scope="session")
|
|
@@ -99,8 +92,16 @@ def event_loop(
|
|
|
99
92
|
|
|
100
93
|
@pytest.fixture(scope="session")
|
|
101
94
|
def test_worker_config() -> WorkerConfig:
|
|
95
|
+
logging_config = LoggingConfig(
|
|
96
|
+
log_in_json=False,
|
|
97
|
+
loggers={
|
|
98
|
+
"datashare_python": "DEBUG",
|
|
99
|
+
"icij_common": "DEBUG",
|
|
100
|
+
"worker_template": "DEBUG",
|
|
101
|
+
},
|
|
102
|
+
)
|
|
102
103
|
return WorkerConfig(
|
|
103
|
-
|
|
104
|
+
logging=logging_config,
|
|
104
105
|
datashare=DatashareClientConfig(url="http://localhost:8080"),
|
|
105
106
|
temporal=TemporalClientConfig(host="localhost:7233"),
|
|
106
107
|
)
|
|
@@ -9,8 +9,9 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
from icij_common.es import ESClient
|
|
11
11
|
|
|
12
|
-
from .config import WorkerConfig
|
|
12
|
+
from .config import LogLevel, WorkerConfig
|
|
13
13
|
from .exceptions import DependencyInjectionError
|
|
14
|
+
from .logging_ import setup_worker_loggers
|
|
14
15
|
from .task_client import DatashareTaskClient
|
|
15
16
|
from .types_ import ContextManagerFactory, TemporalClient
|
|
16
17
|
|
|
@@ -35,10 +36,13 @@ def lifespan_event_loop() -> AbstractEventLoop:
|
|
|
35
36
|
raise DependencyInjectionError("event loop") from e
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
def set_loggers(
|
|
39
|
-
worker_config
|
|
39
|
+
def set_loggers(
|
|
40
|
+
worker_config: WorkerConfig, worker_id: str, loggers: dict[str, LogLevel]
|
|
41
|
+
) -> None:
|
|
42
|
+
setup_worker_loggers(
|
|
43
|
+
loggers=loggers, worker_id=worker_id, in_json=worker_config.logging.log_in_json
|
|
44
|
+
)
|
|
40
45
|
logger.info("worker loggers ready to log 💬")
|
|
41
|
-
logger.info("app config: %s", worker_config.model_dump_json(indent=2))
|
|
42
46
|
|
|
43
47
|
|
|
44
48
|
def set_worker_config(worker_config: WorkerConfig) -> None:
|
|
@@ -4,7 +4,7 @@ from collections.abc import Callable, Iterable
|
|
|
4
4
|
from importlib.metadata import entry_points
|
|
5
5
|
|
|
6
6
|
from .config import WorkerConfig
|
|
7
|
-
from .dependencies import set_worker_config
|
|
7
|
+
from .dependencies import set_loggers, set_worker_config
|
|
8
8
|
from .types_ import ContextManagerFactory
|
|
9
9
|
from .utils import ActivityWithProgress
|
|
10
10
|
|
|
@@ -29,6 +29,8 @@ _Discovery = tuple[
|
|
|
29
29
|
type[WorkerConfig],
|
|
30
30
|
]
|
|
31
31
|
|
|
32
|
+
_MANDATORY_DEPS = [set_worker_config, set_loggers]
|
|
33
|
+
|
|
32
34
|
|
|
33
35
|
def discover(
|
|
34
36
|
wf_names: list[str] | None, *, act_names: list[str] | None, deps_name: str | None
|
|
@@ -68,8 +70,9 @@ def discover(
|
|
|
68
70
|
deps = []
|
|
69
71
|
if deps_name is not None:
|
|
70
72
|
deps = discover_dependencies(deps_name)
|
|
71
|
-
|
|
72
|
-
deps
|
|
73
|
+
for mandatory in _MANDATORY_DEPS:
|
|
74
|
+
if mandatory not in deps:
|
|
75
|
+
deps.append(mandatory)
|
|
73
76
|
if deps:
|
|
74
77
|
n_deps = len(deps)
|
|
75
78
|
discovered += "\n"
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import secrets
|
|
2
|
+
from collections.abc import Generator, Mapping
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from contextvars import ContextVar
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from typing import Annotated, Any, NoReturn, Self, TypeVar
|
|
7
|
+
|
|
8
|
+
from nexusrpc import InputT, OutputT
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
from temporalio.api.common.v1 import Payload
|
|
11
|
+
from temporalio.converter import DataConverter
|
|
12
|
+
from temporalio.worker import (
|
|
13
|
+
ActivityInboundInterceptor,
|
|
14
|
+
ContinueAsNewInput,
|
|
15
|
+
ExecuteActivityInput,
|
|
16
|
+
ExecuteWorkflowInput,
|
|
17
|
+
HandleQueryInput,
|
|
18
|
+
HandleSignalInput,
|
|
19
|
+
Interceptor,
|
|
20
|
+
SignalChildWorkflowInput,
|
|
21
|
+
SignalExternalWorkflowInput,
|
|
22
|
+
StartActivityInput,
|
|
23
|
+
StartChildWorkflowInput,
|
|
24
|
+
StartLocalActivityInput,
|
|
25
|
+
StartNexusOperationInput,
|
|
26
|
+
WorkflowInboundInterceptor,
|
|
27
|
+
WorkflowInterceptorClassInput,
|
|
28
|
+
WorkflowOutboundInterceptor,
|
|
29
|
+
)
|
|
30
|
+
from temporalio.workflow import (
|
|
31
|
+
ActivityHandle,
|
|
32
|
+
ChildWorkflowHandle,
|
|
33
|
+
NexusOperationHandle,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from .objects import BaseModel
|
|
37
|
+
|
|
38
|
+
_TRACEPARENT = "traceparent"
|
|
39
|
+
_DEFAULT_PAYLOAD_CONVERTER = DataConverter.default.payload_converter
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TraceContext(BaseModel):
|
|
43
|
+
# https://www.w3.org/TR/trace-context/
|
|
44
|
+
version: Annotated[str, Field(frozen=True)] = "00"
|
|
45
|
+
trace_id: str
|
|
46
|
+
parent_id: str
|
|
47
|
+
sampled: bool = True
|
|
48
|
+
|
|
49
|
+
def __hash__(self) -> int:
|
|
50
|
+
return hash((self.trace_id, self.parent_id, self.sampled))
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def next_span(cls, parent: Self | None) -> Self:
|
|
54
|
+
new_span_id = secrets.token_hex(8)
|
|
55
|
+
if parent is None:
|
|
56
|
+
trace_id = secrets.token_hex(16)
|
|
57
|
+
return TraceContext(trace_id=trace_id, parent_id=new_span_id)
|
|
58
|
+
return TraceContext(
|
|
59
|
+
trace_id=parent.trace_id, parent_id=new_span_id, sampled=parent.sampled
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def traceparent(self) -> str:
|
|
64
|
+
flags = "01" if self.sampled else "00"
|
|
65
|
+
return f"{self.version}-{self.trace_id}-{self.parent_id}-{flags}"
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_traceparent(cls, traceparent: str) -> Self:
|
|
69
|
+
split = traceparent.split("-")
|
|
70
|
+
if len(split) != 4:
|
|
71
|
+
raise ValueError(f"invalid trace parent: {traceparent}")
|
|
72
|
+
version, trace_id, parent_id, flags = split
|
|
73
|
+
if version != "00":
|
|
74
|
+
msg = (
|
|
75
|
+
f"unsupported trace parent version {version} "
|
|
76
|
+
f"for traceparent {traceparent}"
|
|
77
|
+
)
|
|
78
|
+
raise ValueError(msg)
|
|
79
|
+
sampled = flags == "01"
|
|
80
|
+
return cls(trace_id=trace_id, parent_id=parent_id, sampled=sampled)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
_TRACE_CONTEXT: ContextVar[TraceContext | None] = ContextVar(
|
|
84
|
+
"trace_context", default=None
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TraceContextInterceptor(Interceptor):
|
|
89
|
+
def workflow_interceptor_class(
|
|
90
|
+
self,
|
|
91
|
+
input: WorkflowInterceptorClassInput, # noqa: A002, ARG002
|
|
92
|
+
) -> type[WorkflowInboundInterceptor] | None:
|
|
93
|
+
return _TraceContextWorkflowInboundInterceptor
|
|
94
|
+
|
|
95
|
+
def intercept_activity(
|
|
96
|
+
self,
|
|
97
|
+
next: ActivityInboundInterceptor, # noqa: A002
|
|
98
|
+
) -> ActivityInboundInterceptor:
|
|
99
|
+
return _TraceContextActivityInboundInterceptor(next)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class _TraceContextWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
103
|
+
def init(self, outbound: WorkflowOutboundInterceptor) -> None:
|
|
104
|
+
with_outbound_trace_ctx = _TraceContextWorkflowOutboundInterceptor(outbound)
|
|
105
|
+
super().init(with_outbound_trace_ctx)
|
|
106
|
+
|
|
107
|
+
async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any: # noqa: A002
|
|
108
|
+
with _trace_context(input.headers):
|
|
109
|
+
return await super().execute_workflow(input)
|
|
110
|
+
|
|
111
|
+
async def handle_signal(self, input: HandleSignalInput) -> None: # noqa: A002
|
|
112
|
+
with _trace_context(input.headers):
|
|
113
|
+
return await super().handle_signal(input)
|
|
114
|
+
|
|
115
|
+
async def handle_query(self, input: HandleQueryInput) -> Any: # noqa: A002
|
|
116
|
+
with _trace_context(input.headers):
|
|
117
|
+
return await super().handle_query(input)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class _TraceContextWorkflowOutboundInterceptor(WorkflowOutboundInterceptor):
|
|
121
|
+
def continue_as_new(self, input: ContinueAsNewInput) -> NoReturn: # noqa: A002
|
|
122
|
+
super().continue_as_new(_with_trace_context_header(input))
|
|
123
|
+
|
|
124
|
+
async def signal_child_workflow(self, input: SignalChildWorkflowInput) -> None: # noqa: A002
|
|
125
|
+
return await super().signal_child_workflow(_with_trace_context_header(input))
|
|
126
|
+
|
|
127
|
+
async def signal_external_workflow(
|
|
128
|
+
self,
|
|
129
|
+
input: SignalExternalWorkflowInput, # noqa: A002
|
|
130
|
+
) -> None:
|
|
131
|
+
return await super().signal_external_workflow(_with_trace_context_header(input))
|
|
132
|
+
|
|
133
|
+
def start_activity(self, input: StartActivityInput) -> ActivityHandle[Any]: # noqa: A002
|
|
134
|
+
return super().start_activity(_with_trace_context_header(input))
|
|
135
|
+
|
|
136
|
+
async def start_child_workflow(
|
|
137
|
+
self,
|
|
138
|
+
input: StartChildWorkflowInput, # noqa: A002
|
|
139
|
+
) -> ChildWorkflowHandle[Any, Any]:
|
|
140
|
+
return await super().start_child_workflow(_with_trace_context_header(input))
|
|
141
|
+
|
|
142
|
+
def start_local_activity(
|
|
143
|
+
self,
|
|
144
|
+
input: StartLocalActivityInput, # noqa: A002
|
|
145
|
+
) -> ActivityHandle[Any]:
|
|
146
|
+
return super().start_local_activity(_with_trace_context_header(input))
|
|
147
|
+
|
|
148
|
+
async def start_nexus_operation(
|
|
149
|
+
self,
|
|
150
|
+
input: StartNexusOperationInput[InputT, OutputT], # noqa: A002
|
|
151
|
+
) -> NexusOperationHandle[OutputT]:
|
|
152
|
+
return await super().start_nexus_operation(_with_trace_context_header(input))
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class _TraceContextActivityInboundInterceptor(ActivityInboundInterceptor):
|
|
156
|
+
async def execute_activity(self, input: ExecuteActivityInput) -> Any: # noqa: A002
|
|
157
|
+
with _trace_context(input.headers):
|
|
158
|
+
return await super().execute_activity(input)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_trace_context() -> TraceContext | None:
|
|
162
|
+
return _TRACE_CONTEXT.get()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@contextmanager
|
|
166
|
+
def _trace_context(headers: Mapping[str, Payload]) -> Generator[None, None, None]:
|
|
167
|
+
ctx = headers.get(_TRACEPARENT)
|
|
168
|
+
if ctx is not None:
|
|
169
|
+
ctx = _DEFAULT_PAYLOAD_CONVERTER.from_payloads(
|
|
170
|
+
[headers.get(_TRACEPARENT)], None
|
|
171
|
+
)[0]
|
|
172
|
+
ctx = TraceContext.from_traceparent(ctx)
|
|
173
|
+
else:
|
|
174
|
+
ctx = TraceContext.next_span(None)
|
|
175
|
+
tok = None
|
|
176
|
+
try:
|
|
177
|
+
tok = _TRACE_CONTEXT.set(ctx)
|
|
178
|
+
yield
|
|
179
|
+
finally:
|
|
180
|
+
if tok is not None:
|
|
181
|
+
_TRACE_CONTEXT.reset(tok)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
InputWithHeaders = TypeVar("InputWithHeaders")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _with_trace_context_header(
|
|
188
|
+
input_with_headers: InputWithHeaders,
|
|
189
|
+
) -> InputWithHeaders:
|
|
190
|
+
ctx = get_trace_context()
|
|
191
|
+
if ctx is None:
|
|
192
|
+
return input_with_headers
|
|
193
|
+
new_obj = deepcopy(input_with_headers)
|
|
194
|
+
next_ctx = TraceContext.next_span(ctx)
|
|
195
|
+
new_obj.headers[_TRACEPARENT] = _DEFAULT_PAYLOAD_CONVERTER.to_payload(
|
|
196
|
+
next_ctx.traceparent
|
|
197
|
+
)
|
|
198
|
+
return new_obj
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from copy import copy
|
|
4
|
+
|
|
5
|
+
from icij_common.logging_utils import (
|
|
6
|
+
DATE_FMT,
|
|
7
|
+
STREAM_HANDLER_FMT,
|
|
8
|
+
STREAM_HANDLER_FMT_WITH_WORKER_ID,
|
|
9
|
+
)
|
|
10
|
+
from pythonjsonlogger.core import RESERVED_ATTRS, BaseJsonFormatter
|
|
11
|
+
from pythonjsonlogger.orjson import OrjsonFormatter
|
|
12
|
+
from temporalio import activity, workflow
|
|
13
|
+
|
|
14
|
+
from .config import LogLevel
|
|
15
|
+
from .interceptors import get_trace_context
|
|
16
|
+
|
|
17
|
+
_ACT_LOGGER_ATTRS = ["activity_type", "activity_id", "activity_run_id"]
|
|
18
|
+
_WF_LOGGED_ATTRS = ["workflow_type", "workflow_id", "workflow_run_id"]
|
|
19
|
+
_TRACE_CONTEXT_ATTRS = ["trace_id", "parent_id", "traceparent"]
|
|
20
|
+
_LOGGED_ATTRIBUTES = (
|
|
21
|
+
copy(RESERVED_ATTRS)
|
|
22
|
+
+ _WF_LOGGED_ATTRS
|
|
23
|
+
+ _ACT_LOGGER_ATTRS
|
|
24
|
+
+ _TRACE_CONTEXT_ATTRS
|
|
25
|
+
+ ["worker_id"]
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def setup_worker_loggers(
|
|
30
|
+
loggers: dict[str, LogLevel], *, worker_id: str | None, in_json: bool
|
|
31
|
+
) -> None:
|
|
32
|
+
worker_filter = WorkerFilter(worker_id)
|
|
33
|
+
for logger_name, level_str in loggers.items():
|
|
34
|
+
level = getattr(logging, level_str)
|
|
35
|
+
logger = logging.getLogger(logger_name)
|
|
36
|
+
logger.setLevel(level)
|
|
37
|
+
logger.handlers = []
|
|
38
|
+
for handler in _get_worker_handlers(level, worker_id, in_json=in_json):
|
|
39
|
+
logger.addHandler(handler)
|
|
40
|
+
logger.addFilter(worker_filter)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _get_worker_handlers(
|
|
44
|
+
level: int, worker_id: str | None, *, in_json: bool
|
|
45
|
+
) -> list[logging.Handler]:
|
|
46
|
+
stream_handler = logging.StreamHandler(sys.stderr)
|
|
47
|
+
if in_json:
|
|
48
|
+
fmt = _json_formatter(datefmt=DATE_FMT)
|
|
49
|
+
else:
|
|
50
|
+
if worker_id is not None:
|
|
51
|
+
fmt = STREAM_HANDLER_FMT_WITH_WORKER_ID
|
|
52
|
+
else:
|
|
53
|
+
fmt = STREAM_HANDLER_FMT
|
|
54
|
+
fmt = logging.Formatter(fmt, DATE_FMT)
|
|
55
|
+
stream_handler.setFormatter(fmt)
|
|
56
|
+
stream_handler.setLevel(level)
|
|
57
|
+
return [stream_handler]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class WorkerFilter(logging.Filter):
|
|
61
|
+
def __init__(self, worker_id: str) -> None:
|
|
62
|
+
super().__init__()
|
|
63
|
+
self._worker_id = worker_id
|
|
64
|
+
|
|
65
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
66
|
+
record.worker_id = self._worker_id
|
|
67
|
+
if workflow.in_workflow():
|
|
68
|
+
wf_info = workflow.info()
|
|
69
|
+
for attr in _WF_LOGGED_ATTRS:
|
|
70
|
+
setattr(record, attr, getattr(wf_info, attr))
|
|
71
|
+
if activity.in_activity():
|
|
72
|
+
act_info = activity.info()
|
|
73
|
+
for attr in _ACT_LOGGER_ATTRS:
|
|
74
|
+
setattr(record, attr, getattr(act_info, attr))
|
|
75
|
+
trace_context = get_trace_context()
|
|
76
|
+
if trace_context is not None:
|
|
77
|
+
for attr in _TRACE_CONTEXT_ATTRS:
|
|
78
|
+
setattr(record, attr, getattr(trace_context, attr))
|
|
79
|
+
return True
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _json_formatter(datefmt: str) -> BaseJsonFormatter:
|
|
83
|
+
fmt = OrjsonFormatter( # let's keep logging as fast as possible
|
|
84
|
+
_LOGGED_ATTRIBUTES, datefmt=datefmt
|
|
85
|
+
)
|
|
86
|
+
return fmt
|
|
@@ -338,6 +338,8 @@ def activity_defn(
|
|
|
338
338
|
retriables: set[type[Exception]] = None,
|
|
339
339
|
) -> Callable[[Callable[P, T]], Callable[P, T]]:
|
|
340
340
|
def decorator(activity_fn: Callable[P, T]) -> Callable[P, T]:
|
|
341
|
+
# TODO: some of these could probably be reimplemented more elegantly using
|
|
342
|
+
# temporal interceptors: https://docs.temporal.io/develop/python/workers/interceptors
|
|
341
343
|
activity_fn = positional_args_only(activity_fn)
|
|
342
344
|
activity_fn = with_retriables(retriables)(activity_fn)
|
|
343
345
|
if supports_progress(activity_fn):
|
|
Binary file
|
|
@@ -8,6 +8,7 @@ from asyncio import AbstractEventLoop
|
|
|
8
8
|
from collections.abc import AsyncGenerator, Callable
|
|
9
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
10
10
|
from contextlib import asynccontextmanager
|
|
11
|
+
from copy import copy
|
|
11
12
|
from typing import Any
|
|
12
13
|
|
|
13
14
|
from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
|
|
@@ -15,6 +16,7 @@ from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
|
|
|
15
16
|
from .config import WorkerConfig
|
|
16
17
|
from .dependencies import with_dependencies
|
|
17
18
|
from .discovery import Activity
|
|
19
|
+
from .interceptors import TraceContextInterceptor
|
|
18
20
|
from .types_ import ContextManagerFactory, TemporalClient
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
@@ -83,9 +85,10 @@ def datashare_worker(
|
|
|
83
85
|
max_concurrent_activities = 1
|
|
84
86
|
if workflows:
|
|
85
87
|
logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
|
|
86
|
-
|
|
88
|
+
interceptors = [TraceContextInterceptor()]
|
|
87
89
|
return DatashareWorker(
|
|
88
90
|
client,
|
|
91
|
+
interceptors=interceptors,
|
|
89
92
|
identity=worker_id,
|
|
90
93
|
workflows=workflows,
|
|
91
94
|
activities=activities,
|
|
@@ -142,12 +145,27 @@ async def worker_context(
|
|
|
142
145
|
task_queue: str,
|
|
143
146
|
dependencies: list[ContextManagerFactory] | None = None,
|
|
144
147
|
) -> AsyncGenerator[DatashareWorker, None]:
|
|
148
|
+
discovered = []
|
|
149
|
+
if activities is not None:
|
|
150
|
+
discovered.extend(activities)
|
|
151
|
+
if workflows is not None:
|
|
152
|
+
discovered.extend(workflows)
|
|
153
|
+
if dependencies is not None:
|
|
154
|
+
discovered.extend(dependencies)
|
|
155
|
+
discovered.append(worker_config)
|
|
156
|
+
loggers = copy(worker_config.logging.loggers)
|
|
157
|
+
discovered_loggers = {_get_object_package(o).__name__ for o in discovered}
|
|
158
|
+
for logger in discovered_loggers:
|
|
159
|
+
if logger not in loggers:
|
|
160
|
+
# Log in info by default
|
|
161
|
+
loggers[logger] = "INFO"
|
|
145
162
|
deps_cm = (
|
|
146
163
|
with_dependencies(
|
|
147
164
|
dependencies,
|
|
148
165
|
worker_config=worker_config,
|
|
149
166
|
worker_id=worker_id,
|
|
150
167
|
event_loop=event_loop,
|
|
168
|
+
loggers=loggers,
|
|
151
169
|
)
|
|
152
170
|
if dependencies
|
|
153
171
|
else _do_nothing_cm()
|
|
@@ -181,3 +199,9 @@ def _get_class_from_method(method: Callable) -> type:
|
|
|
181
199
|
class_name = method.__qualname__.rsplit(".", 1)[0]
|
|
182
200
|
module = sys.modules[method.__module__]
|
|
183
201
|
return getattr(module, class_name)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _get_object_package(obj: Any) -> Any:
|
|
205
|
+
mod = inspect.getmodule(obj)
|
|
206
|
+
base, _, _ = mod.__name__.partition(".")
|
|
207
|
+
return sys.modules[base]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datashare-python"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.7.1"
|
|
4
4
|
description = "Manage Pythoœn tasks and local resources in Datashare"
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Clément Doumouro", email = "cdoumouro@icij.org" },
|
|
@@ -10,16 +10,17 @@ authors = [
|
|
|
10
10
|
readme = "README.md"
|
|
11
11
|
requires-python = ">=3.11,<4"
|
|
12
12
|
dependencies = [
|
|
13
|
-
"alive-progress~=3.2
|
|
14
|
-
"aiohttp~=3.11
|
|
13
|
+
"alive-progress~=3.2",
|
|
14
|
+
"aiohttp~=3.11",
|
|
15
15
|
"icij-common[elasticsearch]~=0.8.2",
|
|
16
|
-
"python-json-logger~=4.0
|
|
17
|
-
"nest-asyncio~=1.6
|
|
18
|
-
"temporalio~=1.23
|
|
16
|
+
"python-json-logger~=4.0",
|
|
17
|
+
"nest-asyncio~=1.6",
|
|
18
|
+
"temporalio~=1.23",
|
|
19
19
|
"typer~=0.15.4",
|
|
20
20
|
"tomlkit~=0.14.0",
|
|
21
|
-
"hatchling~=1.27
|
|
21
|
+
"hatchling~=1.27",
|
|
22
22
|
"pyyaml~=6.0",
|
|
23
|
+
"orjson~=3.11",
|
|
23
24
|
]
|
|
24
25
|
|
|
25
26
|
[project.urls]
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|