datashare-python 0.6.3__tar.gz → 0.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {datashare_python-0.6.3 → datashare_python-0.7.1}/PKG-INFO +8 -7
  2. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/config.py +13 -5
  3. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/conftest.py +12 -11
  4. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/dependencies.py +8 -4
  5. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/discovery.py +6 -3
  6. datashare_python-0.7.1/datashare_python/interceptors.py +198 -0
  7. datashare_python-0.7.1/datashare_python/logging_.py +86 -0
  8. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/utils.py +2 -0
  9. datashare_python-0.7.1/datashare_python/worker-template.tar.gz +0 -0
  10. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/worker.py +25 -1
  11. {datashare_python-0.6.3 → datashare_python-0.7.1}/pyproject.toml +8 -7
  12. datashare_python-0.6.3/datashare_python/worker-template.tar.gz +0 -0
  13. {datashare_python-0.6.3 → datashare_python-0.7.1}/.gitignore +0 -0
  14. {datashare_python-0.6.3 → datashare_python-0.7.1}/README.md +0 -0
  15. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/.gitignore +0 -0
  16. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/__init__.py +0 -0
  17. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/__main__.py +0 -0
  18. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/__init__.py +0 -0
  19. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/project.py +0 -0
  20. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/task.py +0 -0
  21. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/utils.py +0 -0
  22. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/cli/worker.py +0 -0
  23. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/constants.py +0 -0
  24. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/exceptions.py +0 -0
  25. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/objects.py +0 -0
  26. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/task_client.py +0 -0
  27. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/template.py +0 -0
  28. {datashare_python-0.6.3 → datashare_python-0.7.1}/datashare_python/types_.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datashare-python
3
- Version: 0.6.3
3
+ Version: 0.7.1
4
4
  Summary: Manage Pythoœn tasks and local resources in Datashare
5
5
  Project-URL: Homepage, https://icij.github.io/datashare-python/
6
6
  Project-URL: Documentation, https://icij.github.io/datashare-python/
@@ -8,13 +8,14 @@ Project-URL: Repository, https://github.com/ICIJ/datashare-python
8
8
  Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
9
9
  Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
10
10
  Requires-Python: <4,>=3.11
11
- Requires-Dist: aiohttp~=3.11.9
12
- Requires-Dist: alive-progress~=3.2.0
13
- Requires-Dist: hatchling~=1.27.0
11
+ Requires-Dist: aiohttp~=3.11
12
+ Requires-Dist: alive-progress~=3.2
13
+ Requires-Dist: hatchling~=1.27
14
14
  Requires-Dist: icij-common[elasticsearch]~=0.8.2
15
- Requires-Dist: nest-asyncio~=1.6.0
16
- Requires-Dist: python-json-logger~=4.0.0
15
+ Requires-Dist: nest-asyncio~=1.6
16
+ Requires-Dist: orjson~=3.11
17
+ Requires-Dist: python-json-logger~=4.0
17
18
  Requires-Dist: pyyaml~=6.0
18
- Requires-Dist: temporalio~=1.23.0
19
+ Requires-Dist: temporalio~=1.23
19
20
  Requires-Dist: tomlkit~=0.14.0
20
21
  Requires-Dist: typer~=0.15.4
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import ClassVar
2
+ from typing import Annotated, Literal
3
3
 
4
4
  from icij_common.es import ESClient
5
5
  from icij_common.pydantic_utils import ICIJSettings
@@ -18,7 +18,6 @@ import datashare_python
18
18
  from .objects import BaseModel
19
19
  from .task_client import DatashareTaskClient
20
20
  from .types_ import TemporalClient
21
- from .utils import LogWithWorkerIDMixin
22
21
 
23
22
  _ALL_LOGGERS = [datashare_python.__name__]
24
23
 
@@ -76,11 +75,20 @@ class TemporalClientConfig(BaseModel):
76
75
  return self._client
77
76
 
78
77
 
79
- class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
78
+ LogLevel = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
79
+
80
+
81
+ class LoggingConfig(BaseModel):
82
+ log_in_json: bool = False
83
+ loggers: dict[str, LogLevel]
84
+
85
+
86
+ class WorkerConfig(ICIJSettings, BaseModel):
80
87
  model_config = DS_WORKER_SETTINGS_CONFIG
81
88
 
82
- loggers: ClassVar[list[str]] = Field(_ALL_LOGGERS, frozen=True)
83
- log_level: str = Field(default="INFO")
89
+ logging: Annotated[LoggingConfig, Field(frozen=True)] = {
90
+ datashare_python.__name__: "INFO"
91
+ }
84
92
 
85
93
  datashare: DatashareClientConfig = DatashareClientConfig()
86
94
  elasticsearch: ESClientConfig = ESClientConfig()
@@ -8,10 +8,12 @@ import nest_asyncio
8
8
  import pytest
9
9
  from elasticsearch._async.helpers import async_streaming_bulk
10
10
  from icij_common.es import DOC_ROOT_ID, ES_DOCUMENT_TYPE, ID, ESClient
11
+ from icij_common.test_utils import reset_env # noqa: F401
11
12
  from temporalio import workflow
12
13
 
13
14
  from datashare_python.config import (
14
15
  DatashareClientConfig,
16
+ LoggingConfig,
15
17
  TemporalClientConfig,
16
18
  WorkerConfig,
17
19
  )
@@ -19,10 +21,7 @@ from datashare_python.dependencies import (
19
21
  lifespan_es_client,
20
22
  lifespan_task_client,
21
23
  set_es_client,
22
- set_event_loop,
23
- set_loggers,
24
24
  set_task_client,
25
- set_temporal_client,
26
25
  with_dependencies,
27
26
  )
28
27
  from datashare_python.objects import Document, TaskState
@@ -78,13 +77,7 @@ class MockedWorkflow:
78
77
 
79
78
  @pytest.fixture(scope="session")
80
79
  def test_deps() -> list[ContextManagerFactory]:
81
- return [
82
- set_loggers,
83
- set_event_loop,
84
- set_es_client,
85
- set_temporal_client,
86
- set_task_client,
87
- ]
80
+ return [set_es_client, set_task_client]
88
81
 
89
82
 
90
83
  @pytest.fixture(scope="session")
@@ -99,8 +92,16 @@ def event_loop(
99
92
 
100
93
  @pytest.fixture(scope="session")
101
94
  def test_worker_config() -> WorkerConfig:
95
+ logging_config = LoggingConfig(
96
+ log_in_json=False,
97
+ loggers={
98
+ "datashare_python": "DEBUG",
99
+ "icij_common": "DEBUG",
100
+ "worker_template": "DEBUG",
101
+ },
102
+ )
102
103
  return WorkerConfig(
103
- log_level="DEBUG",
104
+ logging=logging_config,
104
105
  datashare=DatashareClientConfig(url="http://localhost:8080"),
105
106
  temporal=TemporalClientConfig(host="localhost:7233"),
106
107
  )
@@ -9,8 +9,9 @@ from typing import Any
9
9
 
10
10
  from icij_common.es import ESClient
11
11
 
12
- from .config import WorkerConfig
12
+ from .config import LogLevel, WorkerConfig
13
13
  from .exceptions import DependencyInjectionError
14
+ from .logging_ import setup_worker_loggers
14
15
  from .task_client import DatashareTaskClient
15
16
  from .types_ import ContextManagerFactory, TemporalClient
16
17
 
@@ -35,10 +36,13 @@ def lifespan_event_loop() -> AbstractEventLoop:
35
36
  raise DependencyInjectionError("event loop") from e
36
37
 
37
38
 
38
- def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
39
- worker_config.setup_loggers(worker_id=worker_id)
39
+ def set_loggers(
40
+ worker_config: WorkerConfig, worker_id: str, loggers: dict[str, LogLevel]
41
+ ) -> None:
42
+ setup_worker_loggers(
43
+ loggers=loggers, worker_id=worker_id, in_json=worker_config.logging.log_in_json
44
+ )
40
45
  logger.info("worker loggers ready to log 💬")
41
- logger.info("app config: %s", worker_config.model_dump_json(indent=2))
42
46
 
43
47
 
44
48
  def set_worker_config(worker_config: WorkerConfig) -> None:
@@ -4,7 +4,7 @@ from collections.abc import Callable, Iterable
4
4
  from importlib.metadata import entry_points
5
5
 
6
6
  from .config import WorkerConfig
7
- from .dependencies import set_worker_config
7
+ from .dependencies import set_loggers, set_worker_config
8
8
  from .types_ import ContextManagerFactory
9
9
  from .utils import ActivityWithProgress
10
10
 
@@ -29,6 +29,8 @@ _Discovery = tuple[
29
29
  type[WorkerConfig],
30
30
  ]
31
31
 
32
+ _MANDATORY_DEPS = [set_worker_config, set_loggers]
33
+
32
34
 
33
35
  def discover(
34
36
  wf_names: list[str] | None, *, act_names: list[str] | None, deps_name: str | None
@@ -68,8 +70,9 @@ def discover(
68
70
  deps = []
69
71
  if deps_name is not None:
70
72
  deps = discover_dependencies(deps_name)
71
- if set_worker_config not in deps:
72
- deps.append(set_worker_config)
73
+ for mandatory in _MANDATORY_DEPS:
74
+ if mandatory not in deps:
75
+ deps.append(mandatory)
73
76
  if deps:
74
77
  n_deps = len(deps)
75
78
  discovered += "\n"
@@ -0,0 +1,198 @@
1
+ import secrets
2
+ from collections.abc import Generator, Mapping
3
+ from contextlib import contextmanager
4
+ from contextvars import ContextVar
5
+ from copy import deepcopy
6
+ from typing import Annotated, Any, NoReturn, Self, TypeVar
7
+
8
+ from nexusrpc import InputT, OutputT
9
+ from pydantic import Field
10
+ from temporalio.api.common.v1 import Payload
11
+ from temporalio.converter import DataConverter
12
+ from temporalio.worker import (
13
+ ActivityInboundInterceptor,
14
+ ContinueAsNewInput,
15
+ ExecuteActivityInput,
16
+ ExecuteWorkflowInput,
17
+ HandleQueryInput,
18
+ HandleSignalInput,
19
+ Interceptor,
20
+ SignalChildWorkflowInput,
21
+ SignalExternalWorkflowInput,
22
+ StartActivityInput,
23
+ StartChildWorkflowInput,
24
+ StartLocalActivityInput,
25
+ StartNexusOperationInput,
26
+ WorkflowInboundInterceptor,
27
+ WorkflowInterceptorClassInput,
28
+ WorkflowOutboundInterceptor,
29
+ )
30
+ from temporalio.workflow import (
31
+ ActivityHandle,
32
+ ChildWorkflowHandle,
33
+ NexusOperationHandle,
34
+ )
35
+
36
+ from .objects import BaseModel
37
+
38
+ _TRACEPARENT = "traceparent"
39
+ _DEFAULT_PAYLOAD_CONVERTER = DataConverter.default.payload_converter
40
+
41
+
42
+ class TraceContext(BaseModel):
43
+ # https://www.w3.org/TR/trace-context/
44
+ version: Annotated[str, Field(frozen=True)] = "00"
45
+ trace_id: str
46
+ parent_id: str
47
+ sampled: bool = True
48
+
49
+ def __hash__(self) -> int:
50
+ return hash((self.trace_id, self.parent_id, self.sampled))
51
+
52
+ @classmethod
53
+ def next_span(cls, parent: Self | None) -> Self:
54
+ new_span_id = secrets.token_hex(8)
55
+ if parent is None:
56
+ trace_id = secrets.token_hex(16)
57
+ return TraceContext(trace_id=trace_id, parent_id=new_span_id)
58
+ return TraceContext(
59
+ trace_id=parent.trace_id, parent_id=new_span_id, sampled=parent.sampled
60
+ )
61
+
62
+ @property
63
+ def traceparent(self) -> str:
64
+ flags = "01" if self.sampled else "00"
65
+ return f"{self.version}-{self.trace_id}-{self.parent_id}-{flags}"
66
+
67
+ @classmethod
68
+ def from_traceparent(cls, traceparent: str) -> Self:
69
+ split = traceparent.split("-")
70
+ if len(split) != 4:
71
+ raise ValueError(f"invalid trace parent: {traceparent}")
72
+ version, trace_id, parent_id, flags = split
73
+ if version != "00":
74
+ msg = (
75
+ f"unsupported trace parent version {version} "
76
+ f"for traceparent {traceparent}"
77
+ )
78
+ raise ValueError(msg)
79
+ sampled = flags == "01"
80
+ return cls(trace_id=trace_id, parent_id=parent_id, sampled=sampled)
81
+
82
+
83
+ _TRACE_CONTEXT: ContextVar[TraceContext | None] = ContextVar(
84
+ "trace_context", default=None
85
+ )
86
+
87
+
88
+ class TraceContextInterceptor(Interceptor):
89
+ def workflow_interceptor_class(
90
+ self,
91
+ input: WorkflowInterceptorClassInput, # noqa: A002, ARG002
92
+ ) -> type[WorkflowInboundInterceptor] | None:
93
+ return _TraceContextWorkflowInboundInterceptor
94
+
95
+ def intercept_activity(
96
+ self,
97
+ next: ActivityInboundInterceptor, # noqa: A002
98
+ ) -> ActivityInboundInterceptor:
99
+ return _TraceContextActivityInboundInterceptor(next)
100
+
101
+
102
+ class _TraceContextWorkflowInboundInterceptor(WorkflowInboundInterceptor):
103
+ def init(self, outbound: WorkflowOutboundInterceptor) -> None:
104
+ with_outbound_trace_ctx = _TraceContextWorkflowOutboundInterceptor(outbound)
105
+ super().init(with_outbound_trace_ctx)
106
+
107
+ async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any: # noqa: A002
108
+ with _trace_context(input.headers):
109
+ return await super().execute_workflow(input)
110
+
111
+ async def handle_signal(self, input: HandleSignalInput) -> None: # noqa: A002
112
+ with _trace_context(input.headers):
113
+ return await super().handle_signal(input)
114
+
115
+ async def handle_query(self, input: HandleQueryInput) -> Any: # noqa: A002
116
+ with _trace_context(input.headers):
117
+ return await super().handle_query(input)
118
+
119
+
120
+ class _TraceContextWorkflowOutboundInterceptor(WorkflowOutboundInterceptor):
121
+ def continue_as_new(self, input: ContinueAsNewInput) -> NoReturn: # noqa: A002
122
+ super().continue_as_new(_with_trace_context_header(input))
123
+
124
+ async def signal_child_workflow(self, input: SignalChildWorkflowInput) -> None: # noqa: A002
125
+ return await super().signal_child_workflow(_with_trace_context_header(input))
126
+
127
+ async def signal_external_workflow(
128
+ self,
129
+ input: SignalExternalWorkflowInput, # noqa: A002
130
+ ) -> None:
131
+ return await super().signal_external_workflow(_with_trace_context_header(input))
132
+
133
+ def start_activity(self, input: StartActivityInput) -> ActivityHandle[Any]: # noqa: A002
134
+ return super().start_activity(_with_trace_context_header(input))
135
+
136
+ async def start_child_workflow(
137
+ self,
138
+ input: StartChildWorkflowInput, # noqa: A002
139
+ ) -> ChildWorkflowHandle[Any, Any]:
140
+ return await super().start_child_workflow(_with_trace_context_header(input))
141
+
142
+ def start_local_activity(
143
+ self,
144
+ input: StartLocalActivityInput, # noqa: A002
145
+ ) -> ActivityHandle[Any]:
146
+ return super().start_local_activity(_with_trace_context_header(input))
147
+
148
+ async def start_nexus_operation(
149
+ self,
150
+ input: StartNexusOperationInput[InputT, OutputT], # noqa: A002
151
+ ) -> NexusOperationHandle[OutputT]:
152
+ return await super().start_nexus_operation(_with_trace_context_header(input))
153
+
154
+
155
+ class _TraceContextActivityInboundInterceptor(ActivityInboundInterceptor):
156
+ async def execute_activity(self, input: ExecuteActivityInput) -> Any: # noqa: A002
157
+ with _trace_context(input.headers):
158
+ return await super().execute_activity(input)
159
+
160
+
161
+ def get_trace_context() -> TraceContext | None:
162
+ return _TRACE_CONTEXT.get()
163
+
164
+
165
+ @contextmanager
166
+ def _trace_context(headers: Mapping[str, Payload]) -> Generator[None, None, None]:
167
+ ctx = headers.get(_TRACEPARENT)
168
+ if ctx is not None:
169
+ ctx = _DEFAULT_PAYLOAD_CONVERTER.from_payloads(
170
+ [headers.get(_TRACEPARENT)], None
171
+ )[0]
172
+ ctx = TraceContext.from_traceparent(ctx)
173
+ else:
174
+ ctx = TraceContext.next_span(None)
175
+ tok = None
176
+ try:
177
+ tok = _TRACE_CONTEXT.set(ctx)
178
+ yield
179
+ finally:
180
+ if tok is not None:
181
+ _TRACE_CONTEXT.reset(tok)
182
+
183
+
184
+ InputWithHeaders = TypeVar("InputWithHeaders")
185
+
186
+
187
+ def _with_trace_context_header(
188
+ input_with_headers: InputWithHeaders,
189
+ ) -> InputWithHeaders:
190
+ ctx = get_trace_context()
191
+ if ctx is None:
192
+ return input_with_headers
193
+ new_obj = deepcopy(input_with_headers)
194
+ next_ctx = TraceContext.next_span(ctx)
195
+ new_obj.headers[_TRACEPARENT] = _DEFAULT_PAYLOAD_CONVERTER.to_payload(
196
+ next_ctx.traceparent
197
+ )
198
+ return new_obj
@@ -0,0 +1,86 @@
1
+ import logging
2
+ import sys
3
+ from copy import copy
4
+
5
+ from icij_common.logging_utils import (
6
+ DATE_FMT,
7
+ STREAM_HANDLER_FMT,
8
+ STREAM_HANDLER_FMT_WITH_WORKER_ID,
9
+ )
10
+ from pythonjsonlogger.core import RESERVED_ATTRS, BaseJsonFormatter
11
+ from pythonjsonlogger.orjson import OrjsonFormatter
12
+ from temporalio import activity, workflow
13
+
14
+ from .config import LogLevel
15
+ from .interceptors import get_trace_context
16
+
17
+ _ACT_LOGGER_ATTRS = ["activity_type", "activity_id", "activity_run_id"]
18
+ _WF_LOGGED_ATTRS = ["workflow_type", "workflow_id", "workflow_run_id"]
19
+ _TRACE_CONTEXT_ATTRS = ["trace_id", "parent_id", "traceparent"]
20
+ _LOGGED_ATTRIBUTES = (
21
+ copy(RESERVED_ATTRS)
22
+ + _WF_LOGGED_ATTRS
23
+ + _ACT_LOGGER_ATTRS
24
+ + _TRACE_CONTEXT_ATTRS
25
+ + ["worker_id"]
26
+ )
27
+
28
+
29
+ def setup_worker_loggers(
30
+ loggers: dict[str, LogLevel], *, worker_id: str | None, in_json: bool
31
+ ) -> None:
32
+ worker_filter = WorkerFilter(worker_id)
33
+ for logger_name, level_str in loggers.items():
34
+ level = getattr(logging, level_str)
35
+ logger = logging.getLogger(logger_name)
36
+ logger.setLevel(level)
37
+ logger.handlers = []
38
+ for handler in _get_worker_handlers(level, worker_id, in_json=in_json):
39
+ logger.addHandler(handler)
40
+ logger.addFilter(worker_filter)
41
+
42
+
43
+ def _get_worker_handlers(
44
+ level: int, worker_id: str | None, *, in_json: bool
45
+ ) -> list[logging.Handler]:
46
+ stream_handler = logging.StreamHandler(sys.stderr)
47
+ if in_json:
48
+ fmt = _json_formatter(datefmt=DATE_FMT)
49
+ else:
50
+ if worker_id is not None:
51
+ fmt = STREAM_HANDLER_FMT_WITH_WORKER_ID
52
+ else:
53
+ fmt = STREAM_HANDLER_FMT
54
+ fmt = logging.Formatter(fmt, DATE_FMT)
55
+ stream_handler.setFormatter(fmt)
56
+ stream_handler.setLevel(level)
57
+ return [stream_handler]
58
+
59
+
60
+ class WorkerFilter(logging.Filter):
61
+ def __init__(self, worker_id: str) -> None:
62
+ super().__init__()
63
+ self._worker_id = worker_id
64
+
65
+ def filter(self, record: logging.LogRecord) -> bool:
66
+ record.worker_id = self._worker_id
67
+ if workflow.in_workflow():
68
+ wf_info = workflow.info()
69
+ for attr in _WF_LOGGED_ATTRS:
70
+ setattr(record, attr, getattr(wf_info, attr))
71
+ if activity.in_activity():
72
+ act_info = activity.info()
73
+ for attr in _ACT_LOGGER_ATTRS:
74
+ setattr(record, attr, getattr(act_info, attr))
75
+ trace_context = get_trace_context()
76
+ if trace_context is not None:
77
+ for attr in _TRACE_CONTEXT_ATTRS:
78
+ setattr(record, attr, getattr(trace_context, attr))
79
+ return True
80
+
81
+
82
+ def _json_formatter(datefmt: str) -> BaseJsonFormatter:
83
+ fmt = OrjsonFormatter( # let's keep logging as fast as possible
84
+ _LOGGED_ATTRIBUTES, datefmt=datefmt
85
+ )
86
+ return fmt
@@ -338,6 +338,8 @@ def activity_defn(
338
338
  retriables: set[type[Exception]] = None,
339
339
  ) -> Callable[[Callable[P, T]], Callable[P, T]]:
340
340
  def decorator(activity_fn: Callable[P, T]) -> Callable[P, T]:
341
+ # TODO: some of these could probably be reimplemented more elegantly using
342
+ # temporal interceptors: https://docs.temporal.io/develop/python/workers/interceptors
341
343
  activity_fn = positional_args_only(activity_fn)
342
344
  activity_fn = with_retriables(retriables)(activity_fn)
343
345
  if supports_progress(activity_fn):
@@ -8,6 +8,7 @@ from asyncio import AbstractEventLoop
8
8
  from collections.abc import AsyncGenerator, Callable
9
9
  from concurrent.futures import ThreadPoolExecutor
10
10
  from contextlib import asynccontextmanager
11
+ from copy import copy
11
12
  from typing import Any
12
13
 
13
14
  from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
@@ -15,6 +16,7 @@ from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
15
16
  from .config import WorkerConfig
16
17
  from .dependencies import with_dependencies
17
18
  from .discovery import Activity
19
+ from .interceptors import TraceContextInterceptor
18
20
  from .types_ import ContextManagerFactory, TemporalClient
19
21
 
20
22
  logger = logging.getLogger(__name__)
@@ -83,9 +85,10 @@ def datashare_worker(
83
85
  max_concurrent_activities = 1
84
86
  if workflows:
85
87
  logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
86
-
88
+ interceptors = [TraceContextInterceptor()]
87
89
  return DatashareWorker(
88
90
  client,
91
+ interceptors=interceptors,
89
92
  identity=worker_id,
90
93
  workflows=workflows,
91
94
  activities=activities,
@@ -142,12 +145,27 @@ async def worker_context(
142
145
  task_queue: str,
143
146
  dependencies: list[ContextManagerFactory] | None = None,
144
147
  ) -> AsyncGenerator[DatashareWorker, None]:
148
+ discovered = []
149
+ if activities is not None:
150
+ discovered.extend(activities)
151
+ if workflows is not None:
152
+ discovered.extend(workflows)
153
+ if dependencies is not None:
154
+ discovered.extend(dependencies)
155
+ discovered.append(worker_config)
156
+ loggers = copy(worker_config.logging.loggers)
157
+ discovered_loggers = {_get_object_package(o).__name__ for o in discovered}
158
+ for logger in discovered_loggers:
159
+ if logger not in loggers:
160
+ # Log in info by default
161
+ loggers[logger] = "INFO"
145
162
  deps_cm = (
146
163
  with_dependencies(
147
164
  dependencies,
148
165
  worker_config=worker_config,
149
166
  worker_id=worker_id,
150
167
  event_loop=event_loop,
168
+ loggers=loggers,
151
169
  )
152
170
  if dependencies
153
171
  else _do_nothing_cm()
@@ -181,3 +199,9 @@ def _get_class_from_method(method: Callable) -> type:
181
199
  class_name = method.__qualname__.rsplit(".", 1)[0]
182
200
  module = sys.modules[method.__module__]
183
201
  return getattr(module, class_name)
202
+
203
+
204
+ def _get_object_package(obj: Any) -> Any:
205
+ mod = inspect.getmodule(obj)
206
+ base, _, _ = mod.__name__.partition(".")
207
+ return sys.modules[base]
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "datashare-python"
3
- version = "0.6.3"
3
+ version = "0.7.1"
4
4
  description = "Manage Pythoœn tasks and local resources in Datashare"
5
5
  authors = [
6
6
  { name = "Clément Doumouro", email = "cdoumouro@icij.org" },
@@ -10,16 +10,17 @@ authors = [
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.11,<4"
12
12
  dependencies = [
13
- "alive-progress~=3.2.0",
14
- "aiohttp~=3.11.9",
13
+ "alive-progress~=3.2",
14
+ "aiohttp~=3.11",
15
15
  "icij-common[elasticsearch]~=0.8.2",
16
- "python-json-logger~=4.0.0",
17
- "nest-asyncio~=1.6.0",
18
- "temporalio~=1.23.0",
16
+ "python-json-logger~=4.0",
17
+ "nest-asyncio~=1.6",
18
+ "temporalio~=1.23",
19
19
  "typer~=0.15.4",
20
20
  "tomlkit~=0.14.0",
21
- "hatchling~=1.27.0",
21
+ "hatchling~=1.27",
22
22
  "pyyaml~=6.0",
23
+ "orjson~=3.11",
23
24
  ]
24
25
 
25
26
  [project.urls]