datashare-python 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  from pathlib import Path
2
- from typing import Annotated, Literal
2
+ from typing import Literal
3
3
 
4
4
  from icij_common.es import ESClient
5
5
  from icij_common.pydantic_utils import ICIJSettings
6
- from pydantic import Field, PrivateAttr
6
+ from pydantic import PrivateAttr
7
7
  from pydantic_settings import SettingsConfigDict
8
8
  from temporalio.contrib.pydantic import PydanticJSONPlainPayloadConverter, ToJsonOptions
9
9
  from temporalio.converter import (
@@ -83,12 +83,14 @@ class LoggingConfig(BaseModel):
83
83
  loggers: dict[str, LogLevel]
84
84
 
85
85
 
86
+ _DEFAULT_LOGGERS = {datashare_python.__name__: "INFO"}
87
+ _DEFAULT_LOGGING_CONFIG = LoggingConfig(log_in_json=True, loggers=_DEFAULT_LOGGERS)
88
+
89
+
86
90
  class WorkerConfig(ICIJSettings, BaseModel):
87
91
  model_config = DS_WORKER_SETTINGS_CONFIG
88
92
 
89
- logging: Annotated[LoggingConfig, Field(frozen=True)] = {
90
- datashare_python.__name__: "INFO"
91
- }
93
+ logging: LoggingConfig = _DEFAULT_LOGGING_CONFIG
92
94
 
93
95
  datashare: DatashareClientConfig = DatashareClientConfig()
94
96
  elasticsearch: ESClientConfig = ESClientConfig()
@@ -8,6 +8,7 @@ import nest_asyncio
8
8
  import pytest
9
9
  from elasticsearch._async.helpers import async_streaming_bulk
10
10
  from icij_common.es import DOC_ROOT_ID, ES_DOCUMENT_TYPE, ID, ESClient
11
+ from icij_common.test_utils import reset_env # noqa: F401
11
12
  from temporalio import workflow
12
13
 
13
14
  from datashare_python.config import (
@@ -0,0 +1,198 @@
1
+ import secrets
2
+ from collections.abc import Generator, Mapping
3
+ from contextlib import contextmanager
4
+ from contextvars import ContextVar
5
+ from copy import deepcopy
6
+ from typing import Annotated, Any, NoReturn, Self, TypeVar
7
+
8
+ from nexusrpc import InputT, OutputT
9
+ from pydantic import Field
10
+ from temporalio.api.common.v1 import Payload
11
+ from temporalio.converter import DataConverter
12
+ from temporalio.worker import (
13
+ ActivityInboundInterceptor,
14
+ ContinueAsNewInput,
15
+ ExecuteActivityInput,
16
+ ExecuteWorkflowInput,
17
+ HandleQueryInput,
18
+ HandleSignalInput,
19
+ Interceptor,
20
+ SignalChildWorkflowInput,
21
+ SignalExternalWorkflowInput,
22
+ StartActivityInput,
23
+ StartChildWorkflowInput,
24
+ StartLocalActivityInput,
25
+ StartNexusOperationInput,
26
+ WorkflowInboundInterceptor,
27
+ WorkflowInterceptorClassInput,
28
+ WorkflowOutboundInterceptor,
29
+ )
30
+ from temporalio.workflow import (
31
+ ActivityHandle,
32
+ ChildWorkflowHandle,
33
+ NexusOperationHandle,
34
+ )
35
+
36
+ from .objects import BaseModel
37
+
38
+ _TRACEPARENT = "traceparent"
39
+ _DEFAULT_PAYLOAD_CONVERTER = DataConverter.default.payload_converter
40
+
41
+
42
+ class TraceContext(BaseModel):
43
+ # https://www.w3.org/TR/trace-context/
44
+ version: Annotated[str, Field(frozen=True)] = "00"
45
+ trace_id: str
46
+ parent_id: str
47
+ sampled: bool = True
48
+
49
+ def __hash__(self) -> int:
50
+ return hash((self.trace_id, self.parent_id, self.sampled))
51
+
52
+ @classmethod
53
+ def next_span(cls, parent: Self | None) -> Self:
54
+ new_span_id = secrets.token_hex(8)
55
+ if parent is None:
56
+ trace_id = secrets.token_hex(16)
57
+ return TraceContext(trace_id=trace_id, parent_id=new_span_id)
58
+ return TraceContext(
59
+ trace_id=parent.trace_id, parent_id=new_span_id, sampled=parent.sampled
60
+ )
61
+
62
+ @property
63
+ def traceparent(self) -> str:
64
+ flags = "01" if self.sampled else "00"
65
+ return f"{self.version}-{self.trace_id}-{self.parent_id}-{flags}"
66
+
67
+ @classmethod
68
+ def from_traceparent(cls, traceparent: str) -> Self:
69
+ split = traceparent.split("-")
70
+ if len(split) != 4:
71
+ raise ValueError(f"invalid trace parent: {traceparent}")
72
+ version, trace_id, parent_id, flags = split
73
+ if version != "00":
74
+ msg = (
75
+ f"unsupported trace parent version {version} "
76
+ f"for traceparent {traceparent}"
77
+ )
78
+ raise ValueError(msg)
79
+ sampled = flags == "01"
80
+ return cls(trace_id=trace_id, parent_id=parent_id, sampled=sampled)
81
+
82
+
83
+ _TRACE_CONTEXT: ContextVar[TraceContext | None] = ContextVar(
84
+ "trace_context", default=None
85
+ )
86
+
87
+
88
+ class TraceContextInterceptor(Interceptor):
89
+ def workflow_interceptor_class(
90
+ self,
91
+ input: WorkflowInterceptorClassInput, # noqa: A002, ARG002
92
+ ) -> type[WorkflowInboundInterceptor] | None:
93
+ return _TraceContextWorkflowInboundInterceptor
94
+
95
+ def intercept_activity(
96
+ self,
97
+ next: ActivityInboundInterceptor, # noqa: A002
98
+ ) -> ActivityInboundInterceptor:
99
+ return _TraceContextActivityInboundInterceptor(next)
100
+
101
+
102
+ class _TraceContextWorkflowInboundInterceptor(WorkflowInboundInterceptor):
103
+ def init(self, outbound: WorkflowOutboundInterceptor) -> None:
104
+ with_outbound_trace_ctx = _TraceContextWorkflowOutboundInterceptor(outbound)
105
+ super().init(with_outbound_trace_ctx)
106
+
107
+ async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any: # noqa: A002
108
+ with _trace_context(input.headers):
109
+ return await super().execute_workflow(input)
110
+
111
+ async def handle_signal(self, input: HandleSignalInput) -> None: # noqa: A002
112
+ with _trace_context(input.headers):
113
+ return await super().handle_signal(input)
114
+
115
+ async def handle_query(self, input: HandleQueryInput) -> Any: # noqa: A002
116
+ with _trace_context(input.headers):
117
+ return await super().handle_query(input)
118
+
119
+
120
+ class _TraceContextWorkflowOutboundInterceptor(WorkflowOutboundInterceptor):
121
+ def continue_as_new(self, input: ContinueAsNewInput) -> NoReturn: # noqa: A002
122
+ super().continue_as_new(_with_trace_context_header(input))
123
+
124
+ async def signal_child_workflow(self, input: SignalChildWorkflowInput) -> None: # noqa: A002
125
+ return await super().signal_child_workflow(_with_trace_context_header(input))
126
+
127
+ async def signal_external_workflow(
128
+ self,
129
+ input: SignalExternalWorkflowInput, # noqa: A002
130
+ ) -> None:
131
+ return await super().signal_external_workflow(_with_trace_context_header(input))
132
+
133
+ def start_activity(self, input: StartActivityInput) -> ActivityHandle[Any]: # noqa: A002
134
+ return super().start_activity(_with_trace_context_header(input))
135
+
136
+ async def start_child_workflow(
137
+ self,
138
+ input: StartChildWorkflowInput, # noqa: A002
139
+ ) -> ChildWorkflowHandle[Any, Any]:
140
+ return await super().start_child_workflow(_with_trace_context_header(input))
141
+
142
+ def start_local_activity(
143
+ self,
144
+ input: StartLocalActivityInput, # noqa: A002
145
+ ) -> ActivityHandle[Any]:
146
+ return super().start_local_activity(_with_trace_context_header(input))
147
+
148
+ async def start_nexus_operation(
149
+ self,
150
+ input: StartNexusOperationInput[InputT, OutputT], # noqa: A002
151
+ ) -> NexusOperationHandle[OutputT]:
152
+ return await super().start_nexus_operation(_with_trace_context_header(input))
153
+
154
+
155
+ class _TraceContextActivityInboundInterceptor(ActivityInboundInterceptor):
156
+ async def execute_activity(self, input: ExecuteActivityInput) -> Any: # noqa: A002
157
+ with _trace_context(input.headers):
158
+ return await super().execute_activity(input)
159
+
160
+
161
+ def get_trace_context() -> TraceContext | None:
162
+ return _TRACE_CONTEXT.get()
163
+
164
+
165
+ @contextmanager
166
+ def _trace_context(headers: Mapping[str, Payload]) -> Generator[None, None, None]:
167
+ ctx = headers.get(_TRACEPARENT)
168
+ if ctx is not None:
169
+ ctx = _DEFAULT_PAYLOAD_CONVERTER.from_payloads(
170
+ [headers.get(_TRACEPARENT)], None
171
+ )[0]
172
+ ctx = TraceContext.from_traceparent(ctx)
173
+ else:
174
+ ctx = TraceContext.next_span(None)
175
+ tok = None
176
+ try:
177
+ tok = _TRACE_CONTEXT.set(ctx)
178
+ yield
179
+ finally:
180
+ if tok is not None:
181
+ _TRACE_CONTEXT.reset(tok)
182
+
183
+
184
+ InputWithHeaders = TypeVar("InputWithHeaders")
185
+
186
+
187
+ def _with_trace_context_header(
188
+ input_with_headers: InputWithHeaders,
189
+ ) -> InputWithHeaders:
190
+ ctx = get_trace_context()
191
+ if ctx is None:
192
+ return input_with_headers
193
+ new_obj = deepcopy(input_with_headers)
194
+ next_ctx = TraceContext.next_span(ctx)
195
+ new_obj.headers[_TRACEPARENT] = _DEFAULT_PAYLOAD_CONVERTER.to_payload(
196
+ next_ctx.traceparent
197
+ )
198
+ return new_obj
@@ -12,20 +12,17 @@ from pythonjsonlogger.orjson import OrjsonFormatter
12
12
  from temporalio import activity, workflow
13
13
 
14
14
  from .config import LogLevel
15
+ from .interceptors import get_trace_context
15
16
 
16
- _ACT_LOGGER_ATTRS = [
17
- "activity_type",
18
- "activity_id",
19
- "activity_run_id",
20
- ]
21
-
22
- _WF_LOGGED_ATTRS = [
23
- "workflow_type",
24
- "workflow_id",
25
- "workflow_run_id",
26
- ]
17
+ _ACT_LOGGER_ATTRS = ["activity_type", "activity_id", "activity_run_id"]
18
+ _WF_LOGGED_ATTRS = ["workflow_type", "workflow_id", "workflow_run_id"]
19
+ _TRACE_CONTEXT_ATTRS = ["trace_id", "parent_id", "traceparent"]
27
20
  _LOGGED_ATTRIBUTES = (
28
- copy(RESERVED_ATTRS) + _WF_LOGGED_ATTRS + _ACT_LOGGER_ATTRS + ["worker_id"]
21
+ copy(RESERVED_ATTRS)
22
+ + _WF_LOGGED_ATTRS
23
+ + _ACT_LOGGER_ATTRS
24
+ + _TRACE_CONTEXT_ATTRS
25
+ + ["worker_id"]
29
26
  )
30
27
 
31
28
 
@@ -48,7 +45,7 @@ def _get_worker_handlers(
48
45
  ) -> list[logging.Handler]:
49
46
  stream_handler = logging.StreamHandler(sys.stderr)
50
47
  if in_json:
51
- fmt = _json_formatter(datefmt=DATE_FMT, worker_id=worker_id)
48
+ fmt = _json_formatter(datefmt=DATE_FMT)
52
49
  else:
53
50
  if worker_id is not None:
54
51
  fmt = STREAM_HANDLER_FMT_WITH_WORKER_ID
@@ -75,13 +72,15 @@ class WorkerFilter(logging.Filter):
75
72
  act_info = activity.info()
76
73
  for attr in _ACT_LOGGER_ATTRS:
77
74
  setattr(record, attr, getattr(act_info, attr))
75
+ trace_context = get_trace_context()
76
+ if trace_context is not None:
77
+ for attr in _TRACE_CONTEXT_ATTRS:
78
+ setattr(record, attr, getattr(trace_context, attr))
78
79
  return True
79
80
 
80
81
 
81
- def _json_formatter(datefmt: str, worker_id: str) -> BaseJsonFormatter:
82
+ def _json_formatter(datefmt: str) -> BaseJsonFormatter:
82
83
  fmt = OrjsonFormatter( # let's keep logging as fast as possible
83
- _LOGGED_ATTRIBUTES,
84
- extra={"worker_id": worker_id},
85
- datefmt=datefmt,
84
+ _LOGGED_ATTRIBUTES, datefmt=datefmt
86
85
  )
87
86
  return fmt
datashare_python/utils.py CHANGED
@@ -338,6 +338,8 @@ def activity_defn(
338
338
  retriables: set[type[Exception]] = None,
339
339
  ) -> Callable[[Callable[P, T]], Callable[P, T]]:
340
340
  def decorator(activity_fn: Callable[P, T]) -> Callable[P, T]:
341
+ # TODO: some of these could probably be reimplemented more elegantly using
342
+ # temporal interceptors: https://docs.temporal.io/develop/python/workers/interceptors
341
343
  activity_fn = positional_args_only(activity_fn)
342
344
  activity_fn = with_retriables(retriables)(activity_fn)
343
345
  if supports_progress(activity_fn):
Binary file
@@ -16,6 +16,7 @@ from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
16
16
  from .config import WorkerConfig
17
17
  from .dependencies import with_dependencies
18
18
  from .discovery import Activity
19
+ from .interceptors import TraceContextInterceptor
19
20
  from .types_ import ContextManagerFactory, TemporalClient
20
21
 
21
22
  logger = logging.getLogger(__name__)
@@ -84,9 +85,10 @@ def datashare_worker(
84
85
  max_concurrent_activities = 1
85
86
  if workflows:
86
87
  logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
87
-
88
+ interceptors = [TraceContextInterceptor()]
88
89
  return DatashareWorker(
89
90
  client,
91
+ interceptors=interceptors,
90
92
  identity=worker_id,
91
93
  workflows=workflows,
92
94
  activities=activities,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datashare-python
3
- Version: 0.7.0
3
+ Version: 0.7.2
4
4
  Summary: Manage Pythoœn tasks and local resources in Datashare
5
5
  Project-URL: Homepage, https://icij.github.io/datashare-python/
6
6
  Project-URL: Documentation, https://icij.github.io/datashare-python/
@@ -1,26 +1,27 @@
1
1
  datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,22
2
2
  datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
4
- datashare_python/config.py,sha256=Q4iu3ZGaQB7npaHJWclxPIfgzZTf_-8VxxhXrB9nlpE,3928
5
- datashare_python/conftest.py,sha256=_VESUGax8wt0OlmdJmDFyTJlGO14bt1ccXsjHor0XVk,8592
4
+ datashare_python/config.py,sha256=_Cx4EB1yHXXcLUtw1OBlMk2SKcJRwxqJwgRu6klbxNg,3994
5
+ datashare_python/conftest.py,sha256=MrmQKFcUipm_qn-cHsLovZMwMMtVxyK0s1lmKEx54bc,8651
6
6
  datashare_python/constants.py,sha256=a8-ceZKBVMXydcoNQ35fSjFjxeJ7dt-N6eAvqtPpf9g,320
7
7
  datashare_python/dependencies.py,sha256=KJuAp6Dmv8DQuFnGjbWiHu7StzZj97eBPDyZ_RfCQRc,4141
8
8
  datashare_python/discovery.py,sha256=BPB_Ak6d1-vcf9vAQA63IRb2U8h83_mIIi8MbKbFzQ0,7020
9
9
  datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
10
- datashare_python/logging_.py,sha256=bhOI5ynVrTe2bU3jUNbaWXfe4zQEG_lLQxJ3Esrz9LQ,2589
10
+ datashare_python/interceptors.py,sha256=Pl7GodPO4KbfflmacpW-vOUgLazjlXSlDNENbpOUt1c,6725
11
+ datashare_python/logging_.py,sha256=-qHz4ztKz4mOCO2z4wunQ4M3xoVhztNvxaiozuMLFRM,2815
11
12
  datashare_python/objects.py,sha256=pE0DGNNkl1etxz5ed7T-EaGo1o9TONjH2Lg9u1qdAWU,7571
12
13
  datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2azEc,8435
13
14
  datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
14
15
  datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
15
- datashare_python/utils.py,sha256=ZGZKO9Q4_aLVVilZUCkmHQ21M_37hVOCr7G-qZPOflU,17234
16
- datashare_python/worker-template.tar.gz,sha256=Krs3td9sVkwXP9KPfIdTUgYWUNoU0G5YLGnn_cNxeTY,280218
17
- datashare_python/worker.py,sha256=M_I8AL3KZbKeA7-ObaoMUxz3pHBNslJD0XChOcmyafk,7076
16
+ datashare_python/utils.py,sha256=inVjtlBbgL88mN0UM73SSzW76koTW5MGC0NlyopqRW4,17412
17
+ datashare_python/worker-template.tar.gz,sha256=4AmwMeltOcUWe2bv4tslriE-q8T3CV2fOs_1o96YUNM,286533
18
+ datashare_python/worker.py,sha256=1FdmwYKWKYUKteTM3RC6kFQHR02q8NUDe91hv68QPEo,7207
18
19
  datashare_python/cli/__init__.py,sha256=9BPWtssDgsVfWMsZ1TtZCla0EC_kai4RHttr8oNLYOE,1401
19
20
  datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
20
21
  datashare_python/cli/task.py,sha256=8mvKGS21bZ14BgZ0Uo-dfameljkaI2ZBha80ywCy-E8,5822
21
22
  datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
22
23
  datashare_python/cli/worker.py,sha256=I4KTpFIpXFowioFn72Rm6LBCYlY-Dhp4NBIPvtRgUXE,5283
23
- datashare_python-0.7.0.dist-info/METADATA,sha256=GRZEpX-eTktdvsnSyrvyKzG9kgjqyW4tFs_FDVaEQWU,923
24
- datashare_python-0.7.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
25
- datashare_python-0.7.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
26
- datashare_python-0.7.0.dist-info/RECORD,,
24
+ datashare_python-0.7.2.dist-info/METADATA,sha256=we0tcO0B0fM92BDbLZT0xrV1sRlmN7gElv0i6dTfZmo,923
25
+ datashare_python-0.7.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
26
+ datashare_python-0.7.2.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
27
+ datashare_python-0.7.2.dist-info/RECORD,,