datashare-python 0.6.3__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import ClassVar
2
+ from typing import Annotated, Literal
3
3
 
4
4
  from icij_common.es import ESClient
5
5
  from icij_common.pydantic_utils import ICIJSettings
@@ -18,7 +18,6 @@ import datashare_python
18
18
  from .objects import BaseModel
19
19
  from .task_client import DatashareTaskClient
20
20
  from .types_ import TemporalClient
21
- from .utils import LogWithWorkerIDMixin
22
21
 
23
22
  _ALL_LOGGERS = [datashare_python.__name__]
24
23
 
@@ -76,11 +75,20 @@ class TemporalClientConfig(BaseModel):
76
75
  return self._client
77
76
 
78
77
 
79
- class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
78
+ LogLevel = Literal["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"]
79
+
80
+
81
+ class LoggingConfig(BaseModel):
82
+ log_in_json: bool = False
83
+ loggers: dict[str, LogLevel]
84
+
85
+
86
+ class WorkerConfig(ICIJSettings, BaseModel):
80
87
  model_config = DS_WORKER_SETTINGS_CONFIG
81
88
 
82
- loggers: ClassVar[list[str]] = Field(_ALL_LOGGERS, frozen=True)
83
- log_level: str = Field(default="INFO")
89
+ logging: Annotated[LoggingConfig, Field(frozen=True)] = {
90
+ datashare_python.__name__: "INFO"
91
+ }
84
92
 
85
93
  datashare: DatashareClientConfig = DatashareClientConfig()
86
94
  elasticsearch: ESClientConfig = ESClientConfig()
@@ -12,6 +12,7 @@ from temporalio import workflow
12
12
 
13
13
  from datashare_python.config import (
14
14
  DatashareClientConfig,
15
+ LoggingConfig,
15
16
  TemporalClientConfig,
16
17
  WorkerConfig,
17
18
  )
@@ -19,10 +20,7 @@ from datashare_python.dependencies import (
19
20
  lifespan_es_client,
20
21
  lifespan_task_client,
21
22
  set_es_client,
22
- set_event_loop,
23
- set_loggers,
24
23
  set_task_client,
25
- set_temporal_client,
26
24
  with_dependencies,
27
25
  )
28
26
  from datashare_python.objects import Document, TaskState
@@ -78,13 +76,7 @@ class MockedWorkflow:
78
76
 
79
77
  @pytest.fixture(scope="session")
80
78
  def test_deps() -> list[ContextManagerFactory]:
81
- return [
82
- set_loggers,
83
- set_event_loop,
84
- set_es_client,
85
- set_temporal_client,
86
- set_task_client,
87
- ]
79
+ return [set_es_client, set_task_client]
88
80
 
89
81
 
90
82
  @pytest.fixture(scope="session")
@@ -99,8 +91,16 @@ def event_loop(
99
91
 
100
92
  @pytest.fixture(scope="session")
101
93
  def test_worker_config() -> WorkerConfig:
94
+ logging_config = LoggingConfig(
95
+ log_in_json=False,
96
+ loggers={
97
+ "datashare_python": "DEBUG",
98
+ "icij_common": "DEBUG",
99
+ "worker_template": "DEBUG",
100
+ },
101
+ )
102
102
  return WorkerConfig(
103
- log_level="DEBUG",
103
+ logging=logging_config,
104
104
  datashare=DatashareClientConfig(url="http://localhost:8080"),
105
105
  temporal=TemporalClientConfig(host="localhost:7233"),
106
106
  )
@@ -9,8 +9,9 @@ from typing import Any
9
9
 
10
10
  from icij_common.es import ESClient
11
11
 
12
- from .config import WorkerConfig
12
+ from .config import LogLevel, WorkerConfig
13
13
  from .exceptions import DependencyInjectionError
14
+ from .logging_ import setup_worker_loggers
14
15
  from .task_client import DatashareTaskClient
15
16
  from .types_ import ContextManagerFactory, TemporalClient
16
17
 
@@ -35,10 +36,13 @@ def lifespan_event_loop() -> AbstractEventLoop:
35
36
  raise DependencyInjectionError("event loop") from e
36
37
 
37
38
 
38
- def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
39
- worker_config.setup_loggers(worker_id=worker_id)
39
+ def set_loggers(
40
+ worker_config: WorkerConfig, worker_id: str, loggers: dict[str, LogLevel]
41
+ ) -> None:
42
+ setup_worker_loggers(
43
+ loggers=loggers, worker_id=worker_id, in_json=worker_config.logging.log_in_json
44
+ )
40
45
  logger.info("worker loggers ready to log 💬")
41
- logger.info("app config: %s", worker_config.model_dump_json(indent=2))
42
46
 
43
47
 
44
48
  def set_worker_config(worker_config: WorkerConfig) -> None:
@@ -4,7 +4,7 @@ from collections.abc import Callable, Iterable
4
4
  from importlib.metadata import entry_points
5
5
 
6
6
  from .config import WorkerConfig
7
- from .dependencies import set_worker_config
7
+ from .dependencies import set_loggers, set_worker_config
8
8
  from .types_ import ContextManagerFactory
9
9
  from .utils import ActivityWithProgress
10
10
 
@@ -29,6 +29,8 @@ _Discovery = tuple[
29
29
  type[WorkerConfig],
30
30
  ]
31
31
 
32
+ _MANDATORY_DEPS = [set_worker_config, set_loggers]
33
+
32
34
 
33
35
  def discover(
34
36
  wf_names: list[str] | None, *, act_names: list[str] | None, deps_name: str | None
@@ -68,8 +70,9 @@ def discover(
68
70
  deps = []
69
71
  if deps_name is not None:
70
72
  deps = discover_dependencies(deps_name)
71
- if set_worker_config not in deps:
72
- deps.append(set_worker_config)
73
+ for mandatory in _MANDATORY_DEPS:
74
+ if mandatory not in deps:
75
+ deps.append(mandatory)
73
76
  if deps:
74
77
  n_deps = len(deps)
75
78
  discovered += "\n"
@@ -0,0 +1,87 @@
1
+ import logging
2
+ import sys
3
+ from copy import copy
4
+
5
+ from icij_common.logging_utils import (
6
+ DATE_FMT,
7
+ STREAM_HANDLER_FMT,
8
+ STREAM_HANDLER_FMT_WITH_WORKER_ID,
9
+ )
10
+ from pythonjsonlogger.core import RESERVED_ATTRS, BaseJsonFormatter
11
+ from pythonjsonlogger.orjson import OrjsonFormatter
12
+ from temporalio import activity, workflow
13
+
14
+ from .config import LogLevel
15
+
16
+ _ACT_LOGGER_ATTRS = [
17
+ "activity_type",
18
+ "activity_id",
19
+ "activity_run_id",
20
+ ]
21
+
22
+ _WF_LOGGED_ATTRS = [
23
+ "workflow_type",
24
+ "workflow_id",
25
+ "workflow_run_id",
26
+ ]
27
+ _LOGGED_ATTRIBUTES = (
28
+ copy(RESERVED_ATTRS) + _WF_LOGGED_ATTRS + _ACT_LOGGER_ATTRS + ["worker_id"]
29
+ )
30
+
31
+
32
+ def setup_worker_loggers(
33
+ loggers: dict[str, LogLevel], *, worker_id: str | None, in_json: bool
34
+ ) -> None:
35
+ worker_filter = WorkerFilter(worker_id)
36
+ for logger_name, level_str in loggers.items():
37
+ level = getattr(logging, level_str)
38
+ logger = logging.getLogger(logger_name)
39
+ logger.setLevel(level)
40
+ logger.handlers = []
41
+ for handler in _get_worker_handlers(level, worker_id, in_json=in_json):
42
+ logger.addHandler(handler)
43
+ logger.addFilter(worker_filter)
44
+
45
+
46
+ def _get_worker_handlers(
47
+ level: int, worker_id: str | None, *, in_json: bool
48
+ ) -> list[logging.Handler]:
49
+ stream_handler = logging.StreamHandler(sys.stderr)
50
+ if in_json:
51
+ fmt = _json_formatter(datefmt=DATE_FMT, worker_id=worker_id)
52
+ else:
53
+ if worker_id is not None:
54
+ fmt = STREAM_HANDLER_FMT_WITH_WORKER_ID
55
+ else:
56
+ fmt = STREAM_HANDLER_FMT
57
+ fmt = logging.Formatter(fmt, DATE_FMT)
58
+ stream_handler.setFormatter(fmt)
59
+ stream_handler.setLevel(level)
60
+ return [stream_handler]
61
+
62
+
63
+ class WorkerFilter(logging.Filter):
64
+ def __init__(self, worker_id: str) -> None:
65
+ super().__init__()
66
+ self._worker_id = worker_id
67
+
68
+ def filter(self, record: logging.LogRecord) -> bool:
69
+ record.worker_id = self._worker_id
70
+ if workflow.in_workflow():
71
+ wf_info = workflow.info()
72
+ for attr in _WF_LOGGED_ATTRS:
73
+ setattr(record, attr, getattr(wf_info, attr))
74
+ if activity.in_activity():
75
+ act_info = activity.info()
76
+ for attr in _ACT_LOGGER_ATTRS:
77
+ setattr(record, attr, getattr(act_info, attr))
78
+ return True
79
+
80
+
81
+ def _json_formatter(datefmt: str, worker_id: str) -> BaseJsonFormatter:
82
+ fmt = OrjsonFormatter( # let's keep logging as fast as possible
83
+ _LOGGED_ATTRIBUTES,
84
+ extra={"worker_id": worker_id},
85
+ datefmt=datefmt,
86
+ )
87
+ return fmt
Binary file
@@ -8,6 +8,7 @@ from asyncio import AbstractEventLoop
8
8
  from collections.abc import AsyncGenerator, Callable
9
9
  from concurrent.futures import ThreadPoolExecutor
10
10
  from contextlib import asynccontextmanager
11
+ from copy import copy
11
12
  from typing import Any
12
13
 
13
14
  from temporalio.worker import PollerBehaviorSimpleMaximum, Worker
@@ -142,12 +143,27 @@ async def worker_context(
142
143
  task_queue: str,
143
144
  dependencies: list[ContextManagerFactory] | None = None,
144
145
  ) -> AsyncGenerator[DatashareWorker, None]:
146
+ discovered = []
147
+ if activities is not None:
148
+ discovered.extend(activities)
149
+ if workflows is not None:
150
+ discovered.extend(workflows)
151
+ if dependencies is not None:
152
+ discovered.extend(dependencies)
153
+ discovered.append(worker_config)
154
+ loggers = copy(worker_config.logging.loggers)
155
+ discovered_loggers = {_get_object_package(o).__name__ for o in discovered}
156
+ for logger in discovered_loggers:
157
+ if logger not in loggers:
158
+ # Log in info by default
159
+ loggers[logger] = "INFO"
145
160
  deps_cm = (
146
161
  with_dependencies(
147
162
  dependencies,
148
163
  worker_config=worker_config,
149
164
  worker_id=worker_id,
150
165
  event_loop=event_loop,
166
+ loggers=loggers,
151
167
  )
152
168
  if dependencies
153
169
  else _do_nothing_cm()
@@ -181,3 +197,9 @@ def _get_class_from_method(method: Callable) -> type:
181
197
  class_name = method.__qualname__.rsplit(".", 1)[0]
182
198
  module = sys.modules[method.__module__]
183
199
  return getattr(module, class_name)
200
+
201
+
202
+ def _get_object_package(obj: Any) -> Any:
203
+ mod = inspect.getmodule(obj)
204
+ base, _, _ = mod.__name__.partition(".")
205
+ return sys.modules[base]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datashare-python
3
- Version: 0.6.3
3
+ Version: 0.7.0
4
4
  Summary: Manage Pythoœn tasks and local resources in Datashare
5
5
  Project-URL: Homepage, https://icij.github.io/datashare-python/
6
6
  Project-URL: Documentation, https://icij.github.io/datashare-python/
@@ -8,13 +8,14 @@ Project-URL: Repository, https://github.com/ICIJ/datashare-python
8
8
  Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
9
9
  Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
10
10
  Requires-Python: <4,>=3.11
11
- Requires-Dist: aiohttp~=3.11.9
12
- Requires-Dist: alive-progress~=3.2.0
13
- Requires-Dist: hatchling~=1.27.0
11
+ Requires-Dist: aiohttp~=3.11
12
+ Requires-Dist: alive-progress~=3.2
13
+ Requires-Dist: hatchling~=1.27
14
14
  Requires-Dist: icij-common[elasticsearch]~=0.8.2
15
- Requires-Dist: nest-asyncio~=1.6.0
16
- Requires-Dist: python-json-logger~=4.0.0
15
+ Requires-Dist: nest-asyncio~=1.6
16
+ Requires-Dist: orjson~=3.11
17
+ Requires-Dist: python-json-logger~=4.0
17
18
  Requires-Dist: pyyaml~=6.0
18
- Requires-Dist: temporalio~=1.23.0
19
+ Requires-Dist: temporalio~=1.23
19
20
  Requires-Dist: tomlkit~=0.14.0
20
21
  Requires-Dist: typer~=0.15.4
@@ -1,25 +1,26 @@
1
1
  datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,22
2
2
  datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
4
- datashare_python/config.py,sha256=Mn43zAqUH8bmVpPQ4DK42ffg5LmRtC7ZHHgBlIINlD4,3814
5
- datashare_python/conftest.py,sha256=zUEB9d3CcYVVfsuEXkvF1G5JCEchTknB6pFUiM8FbXA,8512
4
+ datashare_python/config.py,sha256=Q4iu3ZGaQB7npaHJWclxPIfgzZTf_-8VxxhXrB9nlpE,3928
5
+ datashare_python/conftest.py,sha256=_VESUGax8wt0OlmdJmDFyTJlGO14bt1ccXsjHor0XVk,8592
6
6
  datashare_python/constants.py,sha256=a8-ceZKBVMXydcoNQ35fSjFjxeJ7dt-N6eAvqtPpf9g,320
7
- datashare_python/dependencies.py,sha256=4UsVFKRjd2Q0ghg_fUU24P26tFYhg_SnAENj2mKErrY,4060
8
- datashare_python/discovery.py,sha256=UsfIb_pL56BQ5i5xvfuhOO0bDefjA_oToxrCeJQQbcU,6925
7
+ datashare_python/dependencies.py,sha256=KJuAp6Dmv8DQuFnGjbWiHu7StzZj97eBPDyZ_RfCQRc,4141
8
+ datashare_python/discovery.py,sha256=BPB_Ak6d1-vcf9vAQA63IRb2U8h83_mIIi8MbKbFzQ0,7020
9
9
  datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
10
+ datashare_python/logging_.py,sha256=bhOI5ynVrTe2bU3jUNbaWXfe4zQEG_lLQxJ3Esrz9LQ,2589
10
11
  datashare_python/objects.py,sha256=pE0DGNNkl1etxz5ed7T-EaGo1o9TONjH2Lg9u1qdAWU,7571
11
12
  datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2azEc,8435
12
13
  datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
13
14
  datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
14
15
  datashare_python/utils.py,sha256=ZGZKO9Q4_aLVVilZUCkmHQ21M_37hVOCr7G-qZPOflU,17234
15
- datashare_python/worker-template.tar.gz,sha256=65af1_Q255I7JtQbwLj0M8PiGntHoHXjEZGhJi6hIqM,274854
16
- datashare_python/worker.py,sha256=UdSCWZw8qhkhzok89oU7J35VTDZwxRAqef-5Z8yt95A,6333
16
+ datashare_python/worker-template.tar.gz,sha256=Krs3td9sVkwXP9KPfIdTUgYWUNoU0G5YLGnn_cNxeTY,280218
17
+ datashare_python/worker.py,sha256=M_I8AL3KZbKeA7-ObaoMUxz3pHBNslJD0XChOcmyafk,7076
17
18
  datashare_python/cli/__init__.py,sha256=9BPWtssDgsVfWMsZ1TtZCla0EC_kai4RHttr8oNLYOE,1401
18
19
  datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
19
20
  datashare_python/cli/task.py,sha256=8mvKGS21bZ14BgZ0Uo-dfameljkaI2ZBha80ywCy-E8,5822
20
21
  datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
21
22
  datashare_python/cli/worker.py,sha256=I4KTpFIpXFowioFn72Rm6LBCYlY-Dhp4NBIPvtRgUXE,5283
22
- datashare_python-0.6.3.dist-info/METADATA,sha256=nALxBnkp4r_0GdR4eVD7bUAS7OuheY8KA-MqksEcvKg,907
23
- datashare_python-0.6.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
24
- datashare_python-0.6.3.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
25
- datashare_python-0.6.3.dist-info/RECORD,,
23
+ datashare_python-0.7.0.dist-info/METADATA,sha256=GRZEpX-eTktdvsnSyrvyKzG9kgjqyW4tFs_FDVaEQWU,923
24
+ datashare_python-0.7.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
25
+ datashare_python-0.7.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
26
+ datashare_python-0.7.0.dist-info/RECORD,,