datashare-python 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,9 @@ import typer
7
7
  import yaml
8
8
  from icij_common.pydantic_utils import safe_copy
9
9
 
10
- from datashare_python.config import TemporalClientConfig, WorkerConfig
10
+ from datashare_python.config import TemporalClientConfig
11
11
  from datashare_python.discovery import discover, discover_activities, discover_workflows
12
- from datashare_python.worker import bootstrap_worker, create_worker_id
12
+ from datashare_python.worker import create_worker_id, worker_context
13
13
 
14
14
  from .utils import AsyncTyper
15
15
 
@@ -24,6 +24,7 @@ _LIST_ACTIVITY_NAMES_HELP = "activity names filters (supports regexes)"
24
24
  _START_WORKER_WORKFLOWS_HELP = "workflow names run by the worker (supports regexes)"
25
25
  _START_WORKER_ACTIVITIES_HELP = "activity names run by the worker (supports regexes)"
26
26
  _START_WORKER_DEPS_HELP = "worker lifetime dependencies name in the registry"
27
+ _START_WORKER_CONFIG_HELP = "worker config class key the worker configs registry"
27
28
  _START_WORKER_WORKER_ID_PREFIX_HELP = "worker ID prefix"
28
29
  _START_WORKER_CONFIG_PATH_HELP = (
29
30
  "path to a worker config YAML file,"
@@ -85,6 +86,9 @@ async def start(
85
86
  dependencies: Annotated[
86
87
  str | None, typer.Option(help=_START_WORKER_DEPS_HELP)
87
88
  ] = None,
89
+ worker_config_name: Annotated[
90
+ str | None, typer.Option(help=_START_WORKER_CONFIG_HELP)
91
+ ] = None,
88
92
  config_path: Annotated[
89
93
  Path | None,
90
94
  typer.Option(
@@ -102,19 +106,25 @@ async def start(
102
106
  typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP),
103
107
  ] = None,
104
108
  ) -> None:
109
+ registered_wfs, registered_acts, registered_deps, worker_config_cls = discover(
110
+ workflows,
111
+ act_names=activities,
112
+ deps_name=dependencies,
113
+ worker_config_name=worker_config_name,
114
+ )
105
115
  if config_path is not None:
106
116
  with config_path.open() as f:
107
- bootstrap_config = WorkerConfig.model_validate(
117
+ worker_config = worker_config_cls.model_validate(
108
118
  yaml.load(f, Loader=yaml.Loader)
109
119
  )
110
120
  else:
111
- bootstrap_config = WorkerConfig()
121
+ worker_config = worker_config_cls()
112
122
  worker_id = create_worker_id(worker_id_prefix or "worker")
113
123
  logger.info(
114
124
  "starting worker %s on queue %s, with config: %s",
115
125
  worker_id,
116
126
  queue,
117
- bootstrap_config.model_dump_json(indent=2),
127
+ worker_config.model_dump_json(indent=2),
118
128
  )
119
129
  temporal_override = dict()
120
130
  if temporal_address is not None:
@@ -124,24 +134,18 @@ async def start(
124
134
  if temporal_override:
125
135
  temporal_config = TemporalClientConfig(**temporal_override)
126
136
  update = {"temporal": temporal_config}
127
- bootstrap_config = safe_copy(bootstrap_config, update=update)
128
- registered_wfs, registered_acts, registered_deps = discover(
129
- workflows, act_names=activities, deps_name=dependencies
130
- )
131
- client = await bootstrap_config.to_temporal_client()
137
+ worker_config = safe_copy(worker_config, update=update)
138
+ client = await worker_config.to_temporal_client()
132
139
  event_loop = asyncio.get_event_loop()
133
- async with bootstrap_worker(
140
+ worker_ctx = worker_context(
134
141
  worker_id,
135
142
  activities=registered_acts,
136
143
  workflows=registered_wfs,
137
144
  dependencies=registered_deps,
138
- bootstrap_config=bootstrap_config,
145
+ worker_config=worker_config,
139
146
  client=client,
140
147
  event_loop=event_loop,
141
148
  task_queue=queue,
142
- ) as worker:
143
- try:
144
- await worker.run()
145
- except Exception as e: # noqa: BLE001
146
- await worker.shutdown()
147
- raise e
149
+ )
150
+ async with worker_ctx as worker:
151
+ await worker.is_done()
@@ -74,10 +74,6 @@ class TemporalClientConfig(BaseModel):
74
74
  )
75
75
  return self._client
76
76
 
77
- # For the lru_cache
78
- def __hash__(self) -> int:
79
- return id(self)
80
-
81
77
 
82
78
  class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
83
79
  model_config = DS_WORKER_SETTINGS_CONFIG
@@ -17,7 +17,6 @@ from datashare_python.config import (
17
17
  from datashare_python.dependencies import (
18
18
  lifespan_es_client,
19
19
  lifespan_task_client,
20
- lifespan_temporal_client,
21
20
  set_es_client,
22
21
  set_event_loop,
23
22
  set_loggers,
@@ -170,10 +169,11 @@ async def test_task_client(
170
169
 
171
170
 
172
171
  @pytest.fixture(scope="session")
173
- def test_temporal_client_session(
174
- worker_lifetime_deps, # noqa: ANN001, ARG001
175
- ) -> TemporalClient:
176
- return lifespan_temporal_client()
172
+ async def test_temporal_client_session(
173
+ test_worker_config: WorkerConfig,
174
+ event_loop: AbstractEventLoop, # noqa: ARG001
175
+ ) -> TemporalClient: # noqa: ANN001
176
+ return await test_worker_config.to_temporal_client()
177
177
 
178
178
 
179
179
  @pytest.fixture
@@ -21,6 +21,7 @@ EVENT_LOOP: ContextVar[AbstractEventLoop] = ContextVar("event_loop")
21
21
  ES_CLIENT: ContextVar[ESClient] = ContextVar("es_client")
22
22
  TASK_CLIENT: ContextVar[DatashareTaskClient] = ContextVar("task_client")
23
23
  TEMPORAL_CLIENT: ContextVar[TemporalClient] = ContextVar("temporal_client")
24
+ WORKER_CONFIG: ContextVar[WorkerConfig] = ContextVar("worker_config")
24
25
 
25
26
 
26
27
  def set_event_loop(event_loop: AbstractEventLoop) -> None:
@@ -40,6 +41,17 @@ def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
40
41
  logger.info("app config: %s", worker_config.model_dump_json(indent=2))
41
42
 
42
43
 
44
+ def set_worker_config(worker_config: WorkerConfig) -> None:
45
+ WORKER_CONFIG.set(worker_config)
46
+
47
+
48
+ def lifespan_worker_config() -> WorkerConfig:
49
+ try:
50
+ return WORKER_CONFIG.get()
51
+ except LookupError as e:
52
+ raise DependencyInjectionError("worker config") from e
53
+
54
+
43
55
  async def set_es_client(worker_config: WorkerConfig) -> ESClient:
44
56
  client = worker_config.to_es_client()
45
57
  ES_CLIENT.set(client)
@@ -3,6 +3,8 @@ import re
3
3
  from collections.abc import Callable, Iterable
4
4
  from importlib.metadata import entry_points
5
5
 
6
+ from .config import WorkerConfig
7
+ from .dependencies import set_worker_config
6
8
  from .types_ import ContextManagerFactory
7
9
  from .utils import ActivityWithProgress
8
10
 
@@ -11,9 +13,11 @@ logger = logging.getLogger(__name__)
11
13
  Activity = ActivityWithProgress | Callable | type
12
14
 
13
15
  _DEPENDENCIES = "dependencies"
14
- _WORKFLOW_GROUPS = "datashare.workflows"
15
- _ACTIVITIES_GROUPS = "datashare.activities"
16
- _DEPENDENCIES_GROUPS = "datashare.dependencies"
16
+ _WORKER_CONFIGS = "worker_configs"
17
+ _WORKFLOW_GROUP = "datashare.workflows"
18
+ _ACTIVITIES_GROUP = "datashare.activities"
19
+ _DEPENDENCIES_GROUP = "datashare.dependencies"
20
+ _WORKER_CONFIGS_GROUP = "datashare.worker_configs"
17
21
 
18
22
  _RegisteredWorkflow = tuple[str, type]
19
23
  _RegisteredActivity = tuple[str, Activity]
@@ -22,11 +26,16 @@ _Discovery = tuple[
22
26
  Iterable[_RegisteredWorkflow] | None,
23
27
  Iterable[_RegisteredActivity] | None,
24
28
  _Dependencies | None,
29
+ type[WorkerConfig],
25
30
  ]
26
31
 
27
32
 
28
33
  def discover(
29
- wf_names: list[str] | None, *, act_names: list[str] | None, deps_name: str | None
34
+ wf_names: list[str] | None,
35
+ *,
36
+ act_names: list[str] | None,
37
+ deps_name: str | None,
38
+ worker_config_name: str | None,
30
39
  ) -> _Discovery:
31
40
  discovered = ""
32
41
  wfs = None
@@ -58,11 +67,13 @@ def discover(
58
67
  if wf_names:
59
68
  msg += "workflow patterns " + ", ".join(wf_names) + " "
60
69
  if act_names:
61
- msg = "activity patterns " + ", ".join(act_names)
70
+ msg += "activity patterns " + ", ".join(act_names)
62
71
  raise ValueError(msg)
63
- deps = None
72
+ deps = []
64
73
  if deps_name is not None:
65
74
  deps = discover_dependencies(deps_name)
75
+ if set_worker_config not in deps:
76
+ deps.append(set_worker_config)
66
77
  if deps:
67
78
  n_deps = len(deps)
68
79
  discovered += "\n"
@@ -71,13 +82,18 @@ def discover(
71
82
  f"- {n_deps} dependenc{'ies' if n_deps > 1 else 'y'}:"
72
83
  f" {', '.join(deps_names)}"
73
84
  )
85
+ if worker_config_name is not None:
86
+ worker_config_cls = discover_worker_configs(worker_config_name)
87
+ discovered += f"- worker config class: {worker_config_cls}"
88
+ else:
89
+ worker_config_cls = WorkerConfig
74
90
  logger.info("discovered:\n%s", discovered)
75
- return wfs, acts, deps
91
+ return wfs, acts, deps, worker_config_cls
76
92
 
77
93
 
78
94
  def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
79
95
  pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
80
- impls = entry_points(group=_WORKFLOW_GROUPS)
96
+ impls = entry_points(group=_WORKFLOW_GROUP)
81
97
  registered = []
82
98
  for wf_impls in impls:
83
99
  wf_impls = wf_impls.load() # noqa: PLW2901
@@ -93,7 +109,7 @@ def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
93
109
 
94
110
  def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
95
111
  pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
96
- impls = entry_points(group=_ACTIVITIES_GROUPS)
112
+ impls = entry_points(group=_ACTIVITIES_GROUP)
97
113
  registered = []
98
114
  for act_impls in impls:
99
115
  act_impls = act_impls.load() # noqa: PLW2901
@@ -108,9 +124,9 @@ def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
108
124
 
109
125
 
110
126
  def discover_dependencies(name: str) -> _Dependencies:
111
- impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUPS)
127
+ impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUP)
112
128
  if not impls:
113
- available_impls = entry_points(group=_DEPENDENCIES_GROUPS)
129
+ available_impls = entry_points(group=_DEPENDENCIES_GROUP)
114
130
  msg = (
115
131
  f'failed to find dependency: "{name}", '
116
132
  f"available dependencies: {available_impls}"
@@ -131,6 +147,30 @@ def discover_dependencies(name: str) -> _Dependencies:
131
147
  raise LookupError(msg) from e
132
148
 
133
149
 
150
+ def discover_worker_configs(name: str) -> type[WorkerConfig]:
151
+ impls = entry_points(name=_WORKER_CONFIGS, group=_WORKER_CONFIGS_GROUP)
152
+ if not impls:
153
+ available_impls = entry_points(group=_WORKER_CONFIGS_GROUP)
154
+ msg = (
155
+ f'failed to find worker config: "{name}", '
156
+ f"available dependencies: {available_impls}"
157
+ )
158
+ raise LookupError(msg)
159
+ if len(impls) > 1:
160
+ msg = f'found multiple worker configs for name "{name}": {impls}'
161
+ raise ValueError(msg)
162
+ deps_registry = impls[_WORKER_CONFIGS].load()
163
+ try:
164
+ return deps_registry[name]
165
+ except KeyError as e:
166
+ available = list(deps_registry)
167
+ msg = (
168
+ f'failed to find worker config for name "{name}", available worker '
169
+ f"configs: {available}"
170
+ )
171
+ raise LookupError(msg) from e
172
+
173
+
134
174
  def _parse_wf_name(wf_type: type) -> str:
135
175
  if not isinstance(wf_type, type):
136
176
  msg = (
Binary file
@@ -43,6 +43,13 @@ documentation for more details:
43
43
  _ACTIVITY_THREAD_NAME_PREFIX = "datashare-activity-worker-"
44
44
 
45
45
 
46
+ class DatashareWorker(Worker):
47
+ async def is_done(self) -> None:
48
+ if self._async_context_run_task is None:
49
+ raise ValueError("worker is not running")
50
+ await self._async_context_run_task
51
+
52
+
46
53
  def datashare_worker(
47
54
  client: TemporalClient,
48
55
  worker_id: str,
@@ -53,7 +60,7 @@ def datashare_worker(
53
60
  # Scale horizontally be default for activities, each worker processes one activity
54
61
  # at a time
55
62
  max_concurrent_io_activities: int = 10,
56
- ) -> Worker:
63
+ ) -> DatashareWorker:
57
64
  if workflows is None:
58
65
  workflows = []
59
66
  if activities is None:
@@ -77,7 +84,7 @@ def datashare_worker(
77
84
  if workflows:
78
85
  logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
79
86
 
80
- return Worker(
87
+ return DatashareWorker(
81
88
  client,
82
89
  identity=worker_id,
83
90
  workflows=workflows,
@@ -124,21 +131,21 @@ def init_activity(
124
131
 
125
132
 
126
133
  @asynccontextmanager
127
- async def bootstrap_worker(
134
+ async def worker_context(
128
135
  worker_id: str,
129
136
  *,
130
137
  activities: list[Callable[..., Any] | None] | None = None,
131
138
  workflows: list[type] | None = None,
132
- bootstrap_config: WorkerConfig,
139
+ worker_config: WorkerConfig,
133
140
  client: TemporalClient,
134
141
  event_loop: AbstractEventLoop,
135
142
  task_queue: str,
136
143
  dependencies: list[ContextManagerFactory] | None = None,
137
- ) -> AsyncGenerator[Worker, None]:
144
+ ) -> AsyncGenerator[DatashareWorker, None]:
138
145
  deps_cm = (
139
146
  with_dependencies(
140
147
  dependencies,
141
- worker_config=bootstrap_config,
148
+ worker_config=worker_config,
142
149
  worker_id=worker_id,
143
150
  event_loop=event_loop,
144
151
  )
@@ -159,9 +166,10 @@ async def bootstrap_worker(
159
166
  workflows=workflows,
160
167
  activities=acts,
161
168
  task_queue=task_queue,
162
- max_concurrent_io_activities=bootstrap_config.max_concurrent_io_activities,
169
+ max_concurrent_io_activities=worker_config.max_concurrent_io_activities,
163
170
  )
164
- yield worker
171
+ async with worker:
172
+ yield worker
165
173
 
166
174
 
167
175
  @asynccontextmanager
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datashare-python
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Manage Pythoœn tasks and local resources in Datashare
5
5
  Project-URL: Homepage, https://icij.github.io/datashare-python/
6
6
  Project-URL: Documentation, https://icij.github.io/datashare-python/
@@ -1,11 +1,11 @@
1
1
  datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,22
2
2
  datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
4
- datashare_python/config.py,sha256=u6iyOeSXzIO30Yja8Vj9LjM-cq8ESRBy3Kse6UadAMg,3763
5
- datashare_python/conftest.py,sha256=BdRLjy9eJtxAKLDCcon1Nyhzn54CIw2z4s3ZOupNYGo,8256
4
+ datashare_python/config.py,sha256=p-uBTle30kbUdqrj8rXcYv2gHiNwtqcYMnGi4Kctumk,3683
5
+ datashare_python/conftest.py,sha256=rQneF4ms-Gsx4NeRrtrU1bH-rjz3eJ6_LoLB9w-eRG8,8306
6
6
  datashare_python/constants.py,sha256=e6Px11OUee9GSHwTgsgFMszGCMwpW-OznHSMgINvepc,338
7
- datashare_python/dependencies.py,sha256=Diu7alKGaFWyC_ajp0fKU-xp8u5f_8x1axAHVBlppD0,3707
8
- datashare_python/discovery.py,sha256=khWTm11NlMVkRyxPV1lJimcTTicuVdxWHOC4H6PfFwE,6128
7
+ datashare_python/dependencies.py,sha256=4UsVFKRjd2Q0ghg_fUU24P26tFYhg_SnAENj2mKErrY,4060
8
+ datashare_python/discovery.py,sha256=EdWBIz5v0uDRG3zJO6h618qW8fB8tzg6NfK0pDZdV08,7546
9
9
  datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
10
10
  datashare_python/local_client.py,sha256=GP9MTcHVQ1mcb2eO6TiQ7mzQdx199lZRhK8DRuJqJVQ,2359
11
11
  datashare_python/objects.py,sha256=MbTdBHbNNYeWbc7vrWlu2DcJT0uCSvtrDIiPpfgI_s4,4356
@@ -13,15 +13,15 @@ datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2az
13
13
  datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
14
14
  datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
15
15
  datashare_python/utils.py,sha256=DQt-rBwC3Ok72u8VyerG3rqwUTx3ftLfPdMQ5cnRrgs,16801
16
- datashare_python/worker-template.tar.gz,sha256=uarhX-BHryewAFwBZ98_PdA_QUKhApwlXfRYVIULbm0,142242
17
- datashare_python/worker.py,sha256=A4SnmDB4y0ck6Wp_UZWdsSOyTvW54Z2Bq76gxtp-_PE,6070
16
+ datashare_python/worker-template.tar.gz,sha256=NaCH0r1Cxe82mZ4q41YYsv4p3qQL6_siJHdpAd9YNK0,274867
17
+ datashare_python/worker.py,sha256=UdSCWZw8qhkhzok89oU7J35VTDZwxRAqef-5Z8yt95A,6333
18
18
  datashare_python/cli/__init__.py,sha256=5MGSE_0SwlOiwbyPwsP8RIXlTBB2_GGP0zDg4l6UAIY,1479
19
19
  datashare_python/cli/local.py,sha256=S-7qMpSqzi0oMvu01TCFEb8tayEvpw4pXMdCszKEYtU,986
20
20
  datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
21
21
  datashare_python/cli/task.py,sha256=9If5OC7loG4C4gWWl4iOeqPJ4GOLlCWXQfuNLUHORrQ,5860
22
22
  datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
23
- datashare_python/cli/worker.py,sha256=_LZFtv6O57Ez7XdRKmmwQ0b_5_8uFajPySv_jbzXbjI,5384
24
- datashare_python-0.3.0.dist-info/METADATA,sha256=z7AVC4VL38lRzJZlcLHvrj6DVj1H4lWx35HBlZH0btQ,907
25
- datashare_python-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
26
- datashare_python-0.3.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
27
- datashare_python-0.3.0.dist-info/RECORD,,
23
+ datashare_python/cli/worker.py,sha256=ms69-vSgB7HOspH9D5h3-bLhg1mJNFvdIiBCtLtq3wc,5543
24
+ datashare_python-0.4.0.dist-info/METADATA,sha256=mNCY9TciwKu-E9toJPb_B6VBrN8W0jREW_fKl034CHU,907
25
+ datashare_python-0.4.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
26
+ datashare_python-0.4.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
27
+ datashare_python-0.4.0.dist-info/RECORD,,