datashare-python 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datashare_python/config.py +13 -5
- datashare_python/conftest.py +11 -11
- datashare_python/dependencies.py +8 -4
- datashare_python/discovery.py +6 -3
- datashare_python/logging_.py +87 -0
- datashare_python/worker-template.tar.gz +0 -0
- datashare_python/worker.py +22 -0
- {datashare_python-0.6.3.dist-info → datashare_python-0.7.0.dist-info}/METADATA +8 -7
- {datashare_python-0.6.3.dist-info → datashare_python-0.7.0.dist-info}/RECORD +11 -10
- {datashare_python-0.6.3.dist-info → datashare_python-0.7.0.dist-info}/WHEEL +0 -0
- {datashare_python-0.6.3.dist-info → datashare_python-0.7.0.dist-info}/entry_points.txt +0 -0
datashare_python/config.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Annotated, Literal
|
|
3
3
|
|
|
4
4
|
from icij_common.es import ESClient
|
|
5
5
|
from icij_common.pydantic_utils import ICIJSettings
|
|
@@ -18,7 +18,6 @@ import datashare_python
|
|
|
18
18
|
from .objects import BaseModel
|
|
19
19
|
from .task_client import DatashareTaskClient
|
|
20
20
|
from .types_ import TemporalClient
|
|
21
|
-
from .utils import LogWithWorkerIDMixin
|
|
22
21
|
|
|
23
22
|
_ALL_LOGGERS = [datashare_python.__name__]
|
|
24
23
|
|
|
@@ -76,11 +75,20 @@ class TemporalClientConfig(BaseModel):
|
|
|
76
75
|
return self._client
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
|
|
78
|
+
LogLevel = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class LoggingConfig(BaseModel):
|
|
82
|
+
log_in_json: bool = False
|
|
83
|
+
loggers: dict[str, LogLevel]
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class WorkerConfig(ICIJSettings, BaseModel):
|
|
80
87
|
model_config = DS_WORKER_SETTINGS_CONFIG
|
|
81
88
|
|
|
82
|
-
|
|
83
|
-
|
|
89
|
+
logging: Annotated[LoggingConfig, Field(frozen=True)] = {
|
|
90
|
+
datashare_python.__name__: "INFO"
|
|
91
|
+
}
|
|
84
92
|
|
|
85
93
|
datashare: DatashareClientConfig = DatashareClientConfig()
|
|
86
94
|
elasticsearch: ESClientConfig = ESClientConfig()
|
datashare_python/conftest.py
CHANGED
|
@@ -12,6 +12,7 @@ from temporalio import workflow
|
|
|
12
12
|
|
|
13
13
|
from datashare_python.config import (
|
|
14
14
|
DatashareClientConfig,
|
|
15
|
+
LoggingConfig,
|
|
15
16
|
TemporalClientConfig,
|
|
16
17
|
WorkerConfig,
|
|
17
18
|
)
|
|
@@ -19,10 +20,7 @@ from datashare_python.dependencies import (
|
|
|
19
20
|
lifespan_es_client,
|
|
20
21
|
lifespan_task_client,
|
|
21
22
|
set_es_client,
|
|
22
|
-
set_event_loop,
|
|
23
|
-
set_loggers,
|
|
24
23
|
set_task_client,
|
|
25
|
-
set_temporal_client,
|
|
26
24
|
with_dependencies,
|
|
27
25
|
)
|
|
28
26
|
from datashare_python.objects import Document, TaskState
|
|
@@ -78,13 +76,7 @@ class MockedWorkflow:
|
|
|
78
76
|
|
|
79
77
|
@pytest.fixture(scope="session")
|
|
80
78
|
def test_deps() -> list[ContextManagerFactory]:
|
|
81
|
-
return [
|
|
82
|
-
set_loggers,
|
|
83
|
-
set_event_loop,
|
|
84
|
-
set_es_client,
|
|
85
|
-
set_temporal_client,
|
|
86
|
-
set_task_client,
|
|
87
|
-
]
|
|
79
|
+
return [set_es_client, set_task_client]
|
|
88
80
|
|
|
89
81
|
|
|
90
82
|
@pytest.fixture(scope="session")
|
|
@@ -99,8 +91,16 @@ def event_loop(
|
|
|
99
91
|
|
|
100
92
|
@pytest.fixture(scope="session")
|
|
101
93
|
def test_worker_config() -> WorkerConfig:
|
|
94
|
+
logging_config = LoggingConfig(
|
|
95
|
+
log_in_json=False,
|
|
96
|
+
loggers={
|
|
97
|
+
"datashare_python": "DEBUG",
|
|
98
|
+
"icij_common": "DEBUG",
|
|
99
|
+
"worker_template": "DEBUG",
|
|
100
|
+
},
|
|
101
|
+
)
|
|
102
102
|
return WorkerConfig(
|
|
103
|
-
|
|
103
|
+
logging=logging_config,
|
|
104
104
|
datashare=DatashareClientConfig(url="http://localhost:8080"),
|
|
105
105
|
temporal=TemporalClientConfig(host="localhost:7233"),
|
|
106
106
|
)
|
datashare_python/dependencies.py
CHANGED
|
@@ -9,8 +9,9 @@ from typing import Any
|
|
|
9
9
|
|
|
10
10
|
from icij_common.es import ESClient
|
|
11
11
|
|
|
12
|
-
from .config import WorkerConfig
|
|
12
|
+
from .config import LogLevel, WorkerConfig
|
|
13
13
|
from .exceptions import DependencyInjectionError
|
|
14
|
+
from .logging_ import setup_worker_loggers
|
|
14
15
|
from .task_client import DatashareTaskClient
|
|
15
16
|
from .types_ import ContextManagerFactory, TemporalClient
|
|
16
17
|
|
|
@@ -35,10 +36,13 @@ def lifespan_event_loop() -> AbstractEventLoop:
|
|
|
35
36
|
raise DependencyInjectionError("event loop") from e
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
def set_loggers(
|
|
39
|
-
worker_config
|
|
39
|
+
def set_loggers(
|
|
40
|
+
worker_config: WorkerConfig, worker_id: str, loggers: dict[str, LogLevel]
|
|
41
|
+
) -> None:
|
|
42
|
+
setup_worker_loggers(
|
|
43
|
+
loggers=loggers, worker_id=worker_id, in_json=worker_config.logging.log_in_json
|
|
44
|
+
)
|
|
40
45
|
logger.info("worker loggers ready to log 💬")
|
|
41
|
-
logger.info("app config: %s", worker_config.model_dump_json(indent=2))
|
|
42
46
|
|
|
43
47
|
|
|
44
48
|
def set_worker_config(worker_config: WorkerConfig) -> None:
|
datashare_python/discovery.py
CHANGED
|
@@ -4,7 +4,7 @@ from collections.abc import Callable, Iterable
|
|
|
4
4
|
from importlib.metadata import entry_points
|
|
5
5
|
|
|
6
6
|
from .config import WorkerConfig
|
|
7
|
-
from .dependencies import set_worker_config
|
|
7
|
+
from .dependencies import set_loggers, set_worker_config
|
|
8
8
|
from .types_ import ContextManagerFactory
|
|
9
9
|
from .utils import ActivityWithProgress
|
|
10
10
|
|
|
@@ -29,6 +29,8 @@ _Discovery = tuple[
|
|
|
29
29
|
type[WorkerConfig],
|
|
30
30
|
]
|
|
31
31
|
|
|
32
|
+
_MANDATORY_DEPS = [set_worker_config, set_loggers]
|
|
33
|
+
|
|
32
34
|
|
|
33
35
|
def discover(
|
|
34
36
|
wf_names: list[str] | None, *, act_names: list[str] | None, deps_name: str | None
|
|
@@ -68,8 +70,9 @@ def discover(
|
|
|
68
70
|
deps = []
|
|
69
71
|
if deps_name is not None:
|
|
70
72
|
deps = discover_dependencies(deps_name)
|
|
71
|
-
|
|
72
|
-
deps
|
|
73
|
+
for mandatory in _MANDATORY_DEPS:
|
|
74
|
+
if mandatory not in deps:
|
|
75
|
+
deps.append(mandatory)
|
|
73
76
|
if deps:
|
|
74
77
|
n_deps = len(deps)
|
|
75
78
|
discovered += "\n"
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
from copy import copy
|
|
4
|
+
|
|
5
|
+
from icij_common.logging_utils import (
|
|
6
|
+
DATE_FMT,
|
|
7
|
+
STREAM_HANDLER_FMT,
|
|
8
|
+
STREAM_HANDLER_FMT_WITH_WORKER_ID,
|
|
9
|
+
)
|
|
10
|
+
from pythonjsonlogger.core import RESERVED_ATTRS, BaseJsonFormatter
|
|
11
|
+
from pythonjsonlogger.orjson import OrjsonFormatter
|
|
12
|
+
from temporalio import activity, workflow
|
|
13
|
+
|
|
14
|
+
from .config import LogLevel
|
|
15
|
+
|
|
16
|
+
_ACT_LOGGER_ATTRS = [
|
|
17
|
+
"activity_type",
|
|
18
|
+
"activity_id",
|
|
19
|
+
"activity_run_id",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
_WF_LOGGED_ATTRS = [
|
|
23
|
+
"workflow_type",
|
|
24
|
+
"workflow_id",
|
|
25
|
+
"workflow_run_id",
|
|
26
|
+
]
|
|
27
|
+
_LOGGED_ATTRIBUTES = (
|
|
28
|
+
copy(RESERVED_ATTRS) + _WF_LOGGED_ATTRS + _ACT_LOGGER_ATTRS + ["worker_id"]
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def setup_worker_loggers(
|
|
33
|
+
loggers: dict[str, LogLevel], *, worker_id: str | None, in_json: bool
|
|
34
|
+
) -> None:
|
|
35
|
+
worker_filter = WorkerFilter(worker_id)
|
|
36
|
+
for logger_name, level_str in loggers.items():
|
|
37
|
+
level = getattr(logging, level_str)
|
|
38
|
+
logger = logging.getLogger(logger_name)
|
|
39
|
+
logger.setLevel(level)
|
|
40
|
+
logger.handlers = []
|
|
41
|
+
for handler in _get_worker_handlers(level, worker_id, in_json=in_json):
|
|
42
|
+
logger.addHandler(handler)
|
|
43
|
+
logger.addFilter(worker_filter)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get_worker_handlers(
|
|
47
|
+
level: int, worker_id: str | None, *, in_json: bool
|
|
48
|
+
) -> list[logging.Handler]:
|
|
49
|
+
stream_handler = logging.StreamHandler(sys.stderr)
|
|
50
|
+
if in_json:
|
|
51
|
+
fmt = _json_formatter(datefmt=DATE_FMT, worker_id=worker_id)
|
|
52
|
+
else:
|
|
53
|
+
if worker_id is not None:
|
|
54
|
+
fmt = STREAM_HANDLER_FMT_WITH_WORKER_ID
|
|
55
|
+
else:
|
|
56
|
+
fmt = STREAM_HANDLER_FMT
|
|
57
|
+
fmt = logging.Formatter(fmt, DATE_FMT)
|
|
58
|
+
stream_handler.setFormatter(fmt)
|
|
59
|
+
stream_handler.setLevel(level)
|
|
60
|
+
return [stream_handler]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class WorkerFilter(logging.Filter):
|
|
64
|
+
def __init__(self, worker_id: str) -> None:
|
|
65
|
+
super().__init__()
|
|
66
|
+
self._worker_id = worker_id
|
|
67
|
+
|
|
68
|
+
def filter(self, record: logging.LogRecord) -> bool:
|
|
69
|
+
record.worker_id = self._worker_id
|
|
70
|
+
if workflow.in_workflow():
|
|
71
|
+
wf_info = workflow.info()
|
|
72
|
+
for attr in _WF_LOGGED_ATTRS:
|
|
73
|
+
setattr(record, attr, getattr(wf_info, attr))
|
|
74
|
+
if activity.in_activity():
|
|
75
|
+
act_info = activity.info()
|
|
76
|
+
for attr in _ACT_LOGGER_ATTRS:
|
|
77
|
+
setattr(record, attr, getattr(act_info, attr))
|
|
78
|
+
return True
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _json_formatter(datefmt: str, worker_id: str) -> BaseJsonFormatter:
|
|
82
|
+
fmt = OrjsonFormatter( # let's keep logging as fast as possible
|
|
83
|
+
_LOGGED_ATTRIBUTES,
|
|
84
|
+
extra={"worker_id": worker_id},
|
|
85
|
+
datefmt=datefmt,
|
|
86
|
+
)
|
|
87
|
+
return fmt
|
|
Binary file
|
datashare_python/worker.py
CHANGED
|
@@ -8,6 +8,7 @@ from asyncio import AbstractEventLoop
|
|
|
8
8
|
from collections.abc import AsyncGenerator, Callable
|
|
9
9
|
from concurrent.futures import ThreadPoolExecutor
|
|
10
10
|
from contextlib import asynccontextmanager
|
|
11
|
+
from copy import copy
|
|
11
12
|
from typing import Any
|
|
12
13
|
|
|
13
14
|
from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
|
|
@@ -142,12 +143,27 @@ async def worker_context(
|
|
|
142
143
|
task_queue: str,
|
|
143
144
|
dependencies: list[ContextManagerFactory] | None = None,
|
|
144
145
|
) -> AsyncGenerator[DatashareWorker, None]:
|
|
146
|
+
discovered = []
|
|
147
|
+
if activities is not None:
|
|
148
|
+
discovered.extend(activities)
|
|
149
|
+
if workflows is not None:
|
|
150
|
+
discovered.extend(workflows)
|
|
151
|
+
if dependencies is not None:
|
|
152
|
+
discovered.extend(dependencies)
|
|
153
|
+
discovered.append(worker_config)
|
|
154
|
+
loggers = copy(worker_config.logging.loggers)
|
|
155
|
+
discovered_loggers = {_get_object_package(o).__name__ for o in discovered}
|
|
156
|
+
for logger in discovered_loggers:
|
|
157
|
+
if logger not in loggers:
|
|
158
|
+
# Log in info by default
|
|
159
|
+
loggers[logger] = "INFO"
|
|
145
160
|
deps_cm = (
|
|
146
161
|
with_dependencies(
|
|
147
162
|
dependencies,
|
|
148
163
|
worker_config=worker_config,
|
|
149
164
|
worker_id=worker_id,
|
|
150
165
|
event_loop=event_loop,
|
|
166
|
+
loggers=loggers,
|
|
151
167
|
)
|
|
152
168
|
if dependencies
|
|
153
169
|
else _do_nothing_cm()
|
|
@@ -181,3 +197,9 @@ def _get_class_from_method(method: Callable) -> type:
|
|
|
181
197
|
class_name = method.__qualname__.rsplit(".", 1)[0]
|
|
182
198
|
module = sys.modules[method.__module__]
|
|
183
199
|
return getattr(module, class_name)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _get_object_package(obj: Any) -> Any:
|
|
203
|
+
mod = inspect.getmodule(obj)
|
|
204
|
+
base, _, _ = mod.__name__.partition(".")
|
|
205
|
+
return sys.modules[base]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Manage Pythoœn tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -8,13 +8,14 @@ Project-URL: Repository, https://github.com/ICIJ/datashare-python
|
|
|
8
8
|
Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
|
|
9
9
|
Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
|
|
10
10
|
Requires-Python: <4,>=3.11
|
|
11
|
-
Requires-Dist: aiohttp~=3.11
|
|
12
|
-
Requires-Dist: alive-progress~=3.2
|
|
13
|
-
Requires-Dist: hatchling~=1.27
|
|
11
|
+
Requires-Dist: aiohttp~=3.11
|
|
12
|
+
Requires-Dist: alive-progress~=3.2
|
|
13
|
+
Requires-Dist: hatchling~=1.27
|
|
14
14
|
Requires-Dist: icij-common[elasticsearch]~=0.8.2
|
|
15
|
-
Requires-Dist: nest-asyncio~=1.6
|
|
16
|
-
Requires-Dist:
|
|
15
|
+
Requires-Dist: nest-asyncio~=1.6
|
|
16
|
+
Requires-Dist: orjson~=3.11
|
|
17
|
+
Requires-Dist: python-json-logger~=4.0
|
|
17
18
|
Requires-Dist: pyyaml~=6.0
|
|
18
|
-
Requires-Dist: temporalio~=1.23
|
|
19
|
+
Requires-Dist: temporalio~=1.23
|
|
19
20
|
Requires-Dist: tomlkit~=0.14.0
|
|
20
21
|
Requires-Dist: typer~=0.15.4
|
|
@@ -1,25 +1,26 @@
|
|
|
1
1
|
datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,22
|
|
2
2
|
datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
|
|
4
|
-
datashare_python/config.py,sha256=
|
|
5
|
-
datashare_python/conftest.py,sha256=
|
|
4
|
+
datashare_python/config.py,sha256=Q4iu3ZGaQB7npaHJWclxPIfgzZTf_-8VxxhXrB9nlpE,3928
|
|
5
|
+
datashare_python/conftest.py,sha256=_VESUGax8wt0OlmdJmDFyTJlGO14bt1ccXsjHor0XVk,8592
|
|
6
6
|
datashare_python/constants.py,sha256=a8-ceZKBVMXydcoNQ35fSjFjxeJ7dt-N6eAvqtPpf9g,320
|
|
7
|
-
datashare_python/dependencies.py,sha256=
|
|
8
|
-
datashare_python/discovery.py,sha256=
|
|
7
|
+
datashare_python/dependencies.py,sha256=KJuAp6Dmv8DQuFnGjbWiHu7StzZj97eBPDyZ_RfCQRc,4141
|
|
8
|
+
datashare_python/discovery.py,sha256=BPB_Ak6d1-vcf9vAQA63IRb2U8h83_mIIi8MbKbFzQ0,7020
|
|
9
9
|
datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
|
|
10
|
+
datashare_python/logging_.py,sha256=bhOI5ynVrTe2bU3jUNbaWXfe4zQEG_lLQxJ3Esrz9LQ,2589
|
|
10
11
|
datashare_python/objects.py,sha256=pE0DGNNkl1etxz5ed7T-EaGo1o9TONjH2Lg9u1qdAWU,7571
|
|
11
12
|
datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2azEc,8435
|
|
12
13
|
datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
|
|
13
14
|
datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
|
|
14
15
|
datashare_python/utils.py,sha256=ZGZKO9Q4_aLVVilZUCkmHQ21M_37hVOCr7G-qZPOflU,17234
|
|
15
|
-
datashare_python/worker-template.tar.gz,sha256=
|
|
16
|
-
datashare_python/worker.py,sha256=
|
|
16
|
+
datashare_python/worker-template.tar.gz,sha256=Krs3td9sVkwXP9KPfIdTUgYWUNoU0G5YLGnn_cNxeTY,280218
|
|
17
|
+
datashare_python/worker.py,sha256=M_I8AL3KZbKeA7-ObaoMUxz3pHBNslJD0XChOcmyafk,7076
|
|
17
18
|
datashare_python/cli/__init__.py,sha256=9BPWtssDgsVfWMsZ1TtZCla0EC_kai4RHttr8oNLYOE,1401
|
|
18
19
|
datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
|
|
19
20
|
datashare_python/cli/task.py,sha256=8mvKGS21bZ14BgZ0Uo-dfameljkaI2ZBha80ywCy-E8,5822
|
|
20
21
|
datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
|
|
21
22
|
datashare_python/cli/worker.py,sha256=I4KTpFIpXFowioFn72Rm6LBCYlY-Dhp4NBIPvtRgUXE,5283
|
|
22
|
-
datashare_python-0.
|
|
23
|
-
datashare_python-0.
|
|
24
|
-
datashare_python-0.
|
|
25
|
-
datashare_python-0.
|
|
23
|
+
datashare_python-0.7.0.dist-info/METADATA,sha256=GRZEpX-eTktdvsnSyrvyKzG9kgjqyW4tFs_FDVaEQWU,923
|
|
24
|
+
datashare_python-0.7.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
25
|
+
datashare_python-0.7.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
|
|
26
|
+
datashare_python-0.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|