datashare-python 0.7.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datashare_python/conftest.py +1 -0
- datashare_python/interceptors.py +198 -0
- datashare_python/logging_.py +16 -17
- datashare_python/utils.py +2 -0
- datashare_python/worker-template.tar.gz +0 -0
- datashare_python/worker.py +3 -1
- {datashare_python-0.7.0.dist-info → datashare_python-0.7.1.dist-info}/METADATA +1 -1
- {datashare_python-0.7.0.dist-info → datashare_python-0.7.1.dist-info}/RECORD +10 -9
- {datashare_python-0.7.0.dist-info → datashare_python-0.7.1.dist-info}/WHEEL +0 -0
- {datashare_python-0.7.0.dist-info → datashare_python-0.7.1.dist-info}/entry_points.txt +0 -0
datashare_python/conftest.py
CHANGED
|
@@ -8,6 +8,7 @@ import nest_asyncio
|
|
|
8
8
|
import pytest
|
|
9
9
|
from elasticsearch._async.helpers import async_streaming_bulk
|
|
10
10
|
from icij_common.es import DOC_ROOT_ID, ES_DOCUMENT_TYPE, ID, ESClient
|
|
11
|
+
from icij_common.test_utils import reset_env # noqa: F401
|
|
11
12
|
from temporalio import workflow
|
|
12
13
|
|
|
13
14
|
from datashare_python.config import (
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import secrets
|
|
2
|
+
from collections.abc import Generator, Mapping
|
|
3
|
+
from contextlib import contextmanager
|
|
4
|
+
from contextvars import ContextVar
|
|
5
|
+
from copy import deepcopy
|
|
6
|
+
from typing import Annotated, Any, NoReturn, Self, TypeVar
|
|
7
|
+
|
|
8
|
+
from nexusrpc import InputT, OutputT
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
from temporalio.api.common.v1 import Payload
|
|
11
|
+
from temporalio.converter import DataConverter
|
|
12
|
+
from temporalio.worker import (
|
|
13
|
+
ActivityInboundInterceptor,
|
|
14
|
+
ContinueAsNewInput,
|
|
15
|
+
ExecuteActivityInput,
|
|
16
|
+
ExecuteWorkflowInput,
|
|
17
|
+
HandleQueryInput,
|
|
18
|
+
HandleSignalInput,
|
|
19
|
+
Interceptor,
|
|
20
|
+
SignalChildWorkflowInput,
|
|
21
|
+
SignalExternalWorkflowInput,
|
|
22
|
+
StartActivityInput,
|
|
23
|
+
StartChildWorkflowInput,
|
|
24
|
+
StartLocalActivityInput,
|
|
25
|
+
StartNexusOperationInput,
|
|
26
|
+
WorkflowInboundInterceptor,
|
|
27
|
+
WorkflowInterceptorClassInput,
|
|
28
|
+
WorkflowOutboundInterceptor,
|
|
29
|
+
)
|
|
30
|
+
from temporalio.workflow import (
|
|
31
|
+
ActivityHandle,
|
|
32
|
+
ChildWorkflowHandle,
|
|
33
|
+
NexusOperationHandle,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from .objects import BaseModel
|
|
37
|
+
|
|
38
|
+
_TRACEPARENT = "traceparent"
|
|
39
|
+
_DEFAULT_PAYLOAD_CONVERTER = DataConverter.default.payload_converter
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class TraceContext(BaseModel):
|
|
43
|
+
# https://www.w3.org/TR/trace-context/
|
|
44
|
+
version: Annotated[str, Field(frozen=True)] = "00"
|
|
45
|
+
trace_id: str
|
|
46
|
+
parent_id: str
|
|
47
|
+
sampled: bool = True
|
|
48
|
+
|
|
49
|
+
def __hash__(self) -> int:
|
|
50
|
+
return hash((self.trace_id, self.parent_id, self.sampled))
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def next_span(cls, parent: Self | None) -> Self:
|
|
54
|
+
new_span_id = secrets.token_hex(8)
|
|
55
|
+
if parent is None:
|
|
56
|
+
trace_id = secrets.token_hex(16)
|
|
57
|
+
return TraceContext(trace_id=trace_id, parent_id=new_span_id)
|
|
58
|
+
return TraceContext(
|
|
59
|
+
trace_id=parent.trace_id, parent_id=new_span_id, sampled=parent.sampled
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def traceparent(self) -> str:
|
|
64
|
+
flags = "01" if self.sampled else "00"
|
|
65
|
+
return f"{self.version}-{self.trace_id}-{self.parent_id}-{flags}"
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def from_traceparent(cls, traceparent: str) -> Self:
|
|
69
|
+
split = traceparent.split("-")
|
|
70
|
+
if len(split) != 4:
|
|
71
|
+
raise ValueError(f"invalid trace parent: {traceparent}")
|
|
72
|
+
version, trace_id, parent_id, flags = split
|
|
73
|
+
if version != "00":
|
|
74
|
+
msg = (
|
|
75
|
+
f"unsupported trace parent version {version} "
|
|
76
|
+
f"for traceparent {traceparent}"
|
|
77
|
+
)
|
|
78
|
+
raise ValueError(msg)
|
|
79
|
+
sampled = flags == "01"
|
|
80
|
+
return cls(trace_id=trace_id, parent_id=parent_id, sampled=sampled)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
_TRACE_CONTEXT: ContextVar[TraceContext | None] = ContextVar(
|
|
84
|
+
"trace_context", default=None
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class TraceContextInterceptor(Interceptor):
|
|
89
|
+
def workflow_interceptor_class(
|
|
90
|
+
self,
|
|
91
|
+
input: WorkflowInterceptorClassInput, # noqa: A002, ARG002
|
|
92
|
+
) -> type[WorkflowInboundInterceptor] | None:
|
|
93
|
+
return _TraceContextWorkflowInboundInterceptor
|
|
94
|
+
|
|
95
|
+
def intercept_activity(
|
|
96
|
+
self,
|
|
97
|
+
next: ActivityInboundInterceptor, # noqa: A002
|
|
98
|
+
) -> ActivityInboundInterceptor:
|
|
99
|
+
return _TraceContextActivityInboundInterceptor(next)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class _TraceContextWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
103
|
+
def init(self, outbound: WorkflowOutboundInterceptor) -> None:
|
|
104
|
+
with_outbound_trace_ctx = _TraceContextWorkflowOutboundInterceptor(outbound)
|
|
105
|
+
super().init(with_outbound_trace_ctx)
|
|
106
|
+
|
|
107
|
+
async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any: # noqa: A002
|
|
108
|
+
with _trace_context(input.headers):
|
|
109
|
+
return await super().execute_workflow(input)
|
|
110
|
+
|
|
111
|
+
async def handle_signal(self, input: HandleSignalInput) -> None: # noqa: A002
|
|
112
|
+
with _trace_context(input.headers):
|
|
113
|
+
return await super().handle_signal(input)
|
|
114
|
+
|
|
115
|
+
async def handle_query(self, input: HandleQueryInput) -> Any: # noqa: A002
|
|
116
|
+
with _trace_context(input.headers):
|
|
117
|
+
return await super().handle_query(input)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class _TraceContextWorkflowOutboundInterceptor(WorkflowOutboundInterceptor):
|
|
121
|
+
def continue_as_new(self, input: ContinueAsNewInput) -> NoReturn: # noqa: A002
|
|
122
|
+
super().continue_as_new(_with_trace_context_header(input))
|
|
123
|
+
|
|
124
|
+
async def signal_child_workflow(self, input: SignalChildWorkflowInput) -> None: # noqa: A002
|
|
125
|
+
return await super().signal_child_workflow(_with_trace_context_header(input))
|
|
126
|
+
|
|
127
|
+
async def signal_external_workflow(
|
|
128
|
+
self,
|
|
129
|
+
input: SignalExternalWorkflowInput, # noqa: A002
|
|
130
|
+
) -> None:
|
|
131
|
+
return await super().signal_external_workflow(_with_trace_context_header(input))
|
|
132
|
+
|
|
133
|
+
def start_activity(self, input: StartActivityInput) -> ActivityHandle[Any]: # noqa: A002
|
|
134
|
+
return super().start_activity(_with_trace_context_header(input))
|
|
135
|
+
|
|
136
|
+
async def start_child_workflow(
|
|
137
|
+
self,
|
|
138
|
+
input: StartChildWorkflowInput, # noqa: A002
|
|
139
|
+
) -> ChildWorkflowHandle[Any, Any]:
|
|
140
|
+
return await super().start_child_workflow(_with_trace_context_header(input))
|
|
141
|
+
|
|
142
|
+
def start_local_activity(
|
|
143
|
+
self,
|
|
144
|
+
input: StartLocalActivityInput, # noqa: A002
|
|
145
|
+
) -> ActivityHandle[Any]:
|
|
146
|
+
return super().start_local_activity(_with_trace_context_header(input))
|
|
147
|
+
|
|
148
|
+
async def start_nexus_operation(
|
|
149
|
+
self,
|
|
150
|
+
input: StartNexusOperationInput[InputT, OutputT], # noqa: A002
|
|
151
|
+
) -> NexusOperationHandle[OutputT]:
|
|
152
|
+
return await super().start_nexus_operation(_with_trace_context_header(input))
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
class _TraceContextActivityInboundInterceptor(ActivityInboundInterceptor):
|
|
156
|
+
async def execute_activity(self, input: ExecuteActivityInput) -> Any: # noqa: A002
|
|
157
|
+
with _trace_context(input.headers):
|
|
158
|
+
return await super().execute_activity(input)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_trace_context() -> TraceContext | None:
|
|
162
|
+
return _TRACE_CONTEXT.get()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@contextmanager
|
|
166
|
+
def _trace_context(headers: Mapping[str, Payload]) -> Generator[None, None, None]:
|
|
167
|
+
ctx = headers.get(_TRACEPARENT)
|
|
168
|
+
if ctx is not None:
|
|
169
|
+
ctx = _DEFAULT_PAYLOAD_CONVERTER.from_payloads(
|
|
170
|
+
[headers.get(_TRACEPARENT)], None
|
|
171
|
+
)[0]
|
|
172
|
+
ctx = TraceContext.from_traceparent(ctx)
|
|
173
|
+
else:
|
|
174
|
+
ctx = TraceContext.next_span(None)
|
|
175
|
+
tok = None
|
|
176
|
+
try:
|
|
177
|
+
tok = _TRACE_CONTEXT.set(ctx)
|
|
178
|
+
yield
|
|
179
|
+
finally:
|
|
180
|
+
if tok is not None:
|
|
181
|
+
_TRACE_CONTEXT.reset(tok)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
InputWithHeaders = TypeVar("InputWithHeaders")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _with_trace_context_header(
|
|
188
|
+
input_with_headers: InputWithHeaders,
|
|
189
|
+
) -> InputWithHeaders:
|
|
190
|
+
ctx = get_trace_context()
|
|
191
|
+
if ctx is None:
|
|
192
|
+
return input_with_headers
|
|
193
|
+
new_obj = deepcopy(input_with_headers)
|
|
194
|
+
next_ctx = TraceContext.next_span(ctx)
|
|
195
|
+
new_obj.headers[_TRACEPARENT] = _DEFAULT_PAYLOAD_CONVERTER.to_payload(
|
|
196
|
+
next_ctx.traceparent
|
|
197
|
+
)
|
|
198
|
+
return new_obj
|
datashare_python/logging_.py
CHANGED
|
@@ -12,20 +12,17 @@ from pythonjsonlogger.orjson import OrjsonFormatter
|
|
|
12
12
|
from temporalio import activity, workflow
|
|
13
13
|
|
|
14
14
|
from .config import LogLevel
|
|
15
|
+
from .interceptors import get_trace_context
|
|
15
16
|
|
|
16
|
-
_ACT_LOGGER_ATTRS = [
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
"activity_run_id",
|
|
20
|
-
]
|
|
21
|
-
|
|
22
|
-
_WF_LOGGED_ATTRS = [
|
|
23
|
-
"workflow_type",
|
|
24
|
-
"workflow_id",
|
|
25
|
-
"workflow_run_id",
|
|
26
|
-
]
|
|
17
|
+
_ACT_LOGGER_ATTRS = ["activity_type", "activity_id", "activity_run_id"]
|
|
18
|
+
_WF_LOGGED_ATTRS = ["workflow_type", "workflow_id", "workflow_run_id"]
|
|
19
|
+
_TRACE_CONTEXT_ATTRS = ["trace_id", "parent_id", "traceparent"]
|
|
27
20
|
_LOGGED_ATTRIBUTES = (
|
|
28
|
-
copy(RESERVED_ATTRS)
|
|
21
|
+
copy(RESERVED_ATTRS)
|
|
22
|
+
+ _WF_LOGGED_ATTRS
|
|
23
|
+
+ _ACT_LOGGER_ATTRS
|
|
24
|
+
+ _TRACE_CONTEXT_ATTRS
|
|
25
|
+
+ ["worker_id"]
|
|
29
26
|
)
|
|
30
27
|
|
|
31
28
|
|
|
@@ -48,7 +45,7 @@ def _get_worker_handlers(
|
|
|
48
45
|
) -> list[logging.Handler]:
|
|
49
46
|
stream_handler = logging.StreamHandler(sys.stderr)
|
|
50
47
|
if in_json:
|
|
51
|
-
fmt = _json_formatter(datefmt=DATE_FMT
|
|
48
|
+
fmt = _json_formatter(datefmt=DATE_FMT)
|
|
52
49
|
else:
|
|
53
50
|
if worker_id is not None:
|
|
54
51
|
fmt = STREAM_HANDLER_FMT_WITH_WORKER_ID
|
|
@@ -75,13 +72,15 @@ class WorkerFilter(logging.Filter):
|
|
|
75
72
|
act_info = activity.info()
|
|
76
73
|
for attr in _ACT_LOGGER_ATTRS:
|
|
77
74
|
setattr(record, attr, getattr(act_info, attr))
|
|
75
|
+
trace_context = get_trace_context()
|
|
76
|
+
if trace_context is not None:
|
|
77
|
+
for attr in _TRACE_CONTEXT_ATTRS:
|
|
78
|
+
setattr(record, attr, getattr(trace_context, attr))
|
|
78
79
|
return True
|
|
79
80
|
|
|
80
81
|
|
|
81
|
-
def _json_formatter(datefmt: str
|
|
82
|
+
def _json_formatter(datefmt: str) -> BaseJsonFormatter:
|
|
82
83
|
fmt = OrjsonFormatter( # let's keep logging as fast as possible
|
|
83
|
-
_LOGGED_ATTRIBUTES,
|
|
84
|
-
extra={"worker_id": worker_id},
|
|
85
|
-
datefmt=datefmt,
|
|
84
|
+
_LOGGED_ATTRIBUTES, datefmt=datefmt
|
|
86
85
|
)
|
|
87
86
|
return fmt
|
datashare_python/utils.py
CHANGED
|
@@ -338,6 +338,8 @@ def activity_defn(
|
|
|
338
338
|
retriables: set[type[Exception]] = None,
|
|
339
339
|
) -> Callable[[Callable[P, T]], Callable[P, T]]:
|
|
340
340
|
def decorator(activity_fn: Callable[P, T]) -> Callable[P, T]:
|
|
341
|
+
# TODO: some of these could probably be reimplemented more elegantly using
|
|
342
|
+
# temporal interceptors: https://docs.temporal.io/develop/python/workers/interceptors
|
|
341
343
|
activity_fn = positional_args_only(activity_fn)
|
|
342
344
|
activity_fn = with_retriables(retriables)(activity_fn)
|
|
343
345
|
if supports_progress(activity_fn):
|
|
Binary file
|
datashare_python/worker.py
CHANGED
|
@@ -16,6 +16,7 @@ from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
|
|
|
16
16
|
from .config import WorkerConfig
|
|
17
17
|
from .dependencies import with_dependencies
|
|
18
18
|
from .discovery import Activity
|
|
19
|
+
from .interceptors import TraceContextInterceptor
|
|
19
20
|
from .types_ import ContextManagerFactory, TemporalClient
|
|
20
21
|
|
|
21
22
|
logger = logging.getLogger(__name__)
|
|
@@ -84,9 +85,10 @@ def datashare_worker(
|
|
|
84
85
|
max_concurrent_activities = 1
|
|
85
86
|
if workflows:
|
|
86
87
|
logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
|
|
87
|
-
|
|
88
|
+
interceptors = [TraceContextInterceptor()]
|
|
88
89
|
return DatashareWorker(
|
|
89
90
|
client,
|
|
91
|
+
interceptors=interceptors,
|
|
90
92
|
identity=worker_id,
|
|
91
93
|
workflows=workflows,
|
|
92
94
|
activities=activities,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: Manage Pythoœn tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -2,25 +2,26 @@ datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,2
|
|
|
2
2
|
datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
|
|
4
4
|
datashare_python/config.py,sha256=Q4iu3ZGaQB7npaHJWclxPIfgzZTf_-8VxxhXrB9nlpE,3928
|
|
5
|
-
datashare_python/conftest.py,sha256=
|
|
5
|
+
datashare_python/conftest.py,sha256=MrmQKFcUipm_qn-cHsLovZMwMMtVxyK0s1lmKEx54bc,8651
|
|
6
6
|
datashare_python/constants.py,sha256=a8-ceZKBVMXydcoNQ35fSjFjxeJ7dt-N6eAvqtPpf9g,320
|
|
7
7
|
datashare_python/dependencies.py,sha256=KJuAp6Dmv8DQuFnGjbWiHu7StzZj97eBPDyZ_RfCQRc,4141
|
|
8
8
|
datashare_python/discovery.py,sha256=BPB_Ak6d1-vcf9vAQA63IRb2U8h83_mIIi8MbKbFzQ0,7020
|
|
9
9
|
datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
|
|
10
|
-
datashare_python/
|
|
10
|
+
datashare_python/interceptors.py,sha256=Pl7GodPO4KbfflmacpW-vOUgLazjlXSlDNENbpOUt1c,6725
|
|
11
|
+
datashare_python/logging_.py,sha256=-qHz4ztKz4mOCO2z4wunQ4M3xoVhztNvxaiozuMLFRM,2815
|
|
11
12
|
datashare_python/objects.py,sha256=pE0DGNNkl1etxz5ed7T-EaGo1o9TONjH2Lg9u1qdAWU,7571
|
|
12
13
|
datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2azEc,8435
|
|
13
14
|
datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
|
|
14
15
|
datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
|
|
15
|
-
datashare_python/utils.py,sha256=
|
|
16
|
-
datashare_python/worker-template.tar.gz,sha256=
|
|
17
|
-
datashare_python/worker.py,sha256=
|
|
16
|
+
datashare_python/utils.py,sha256=inVjtlBbgL88mN0UM73SSzW76koTW5MGC0NlyopqRW4,17412
|
|
17
|
+
datashare_python/worker-template.tar.gz,sha256=gNSDvn4Lh8iFpFk6j8nlu7pJoIDLh7SVa6EHcjXrj54,286805
|
|
18
|
+
datashare_python/worker.py,sha256=1FdmwYKWKYUKteTM3RC6kFQHR02q8NUDe91hv68QPEo,7207
|
|
18
19
|
datashare_python/cli/__init__.py,sha256=9BPWtssDgsVfWMsZ1TtZCla0EC_kai4RHttr8oNLYOE,1401
|
|
19
20
|
datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
|
|
20
21
|
datashare_python/cli/task.py,sha256=8mvKGS21bZ14BgZ0Uo-dfameljkaI2ZBha80ywCy-E8,5822
|
|
21
22
|
datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
|
|
22
23
|
datashare_python/cli/worker.py,sha256=I4KTpFIpXFowioFn72Rm6LBCYlY-Dhp4NBIPvtRgUXE,5283
|
|
23
|
-
datashare_python-0.7.
|
|
24
|
-
datashare_python-0.7.
|
|
25
|
-
datashare_python-0.7.
|
|
26
|
-
datashare_python-0.7.
|
|
24
|
+
datashare_python-0.7.1.dist-info/METADATA,sha256=FF5E62UhxGi0zBseyb4b_6TQRl2JtDZlA1wLECoTblg,923
|
|
25
|
+
datashare_python-0.7.1.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
26
|
+
datashare_python-0.7.1.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
|
|
27
|
+
datashare_python-0.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|