datashare-python 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,9 @@ import typer
7
7
  import yaml
8
8
  from icij_common.pydantic_utils import safe_copy
9
9
 
10
- from datashare_python.config import TemporalClientConfig, WorkerConfig
10
+ from datashare_python.config import TemporalClientConfig
11
11
  from datashare_python.discovery import discover, discover_activities, discover_workflows
12
- from datashare_python.worker import bootstrap_worker, create_worker_id
12
+ from datashare_python.worker import create_worker_id, worker_context
13
13
 
14
14
  from .utils import AsyncTyper
15
15
 
@@ -102,19 +102,22 @@ async def start(
102
102
  typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP),
103
103
  ] = None,
104
104
  ) -> None:
105
+ registered_wfs, registered_acts, registered_deps, worker_config_cls = discover(
106
+ workflows, act_names=activities, deps_name=dependencies
107
+ )
105
108
  if config_path is not None:
106
109
  with config_path.open() as f:
107
- bootstrap_config = WorkerConfig.model_validate(
110
+ worker_config = worker_config_cls.model_validate(
108
111
  yaml.load(f, Loader=yaml.Loader)
109
112
  )
110
113
  else:
111
- bootstrap_config = WorkerConfig()
114
+ worker_config = worker_config_cls()
112
115
  worker_id = create_worker_id(worker_id_prefix or "worker")
113
116
  logger.info(
114
117
  "starting worker %s on queue %s, with config: %s",
115
118
  worker_id,
116
119
  queue,
117
- bootstrap_config.model_dump_json(indent=2),
120
+ worker_config.model_dump_json(indent=2),
118
121
  )
119
122
  temporal_override = dict()
120
123
  if temporal_address is not None:
@@ -124,24 +127,18 @@ async def start(
124
127
  if temporal_override:
125
128
  temporal_config = TemporalClientConfig(**temporal_override)
126
129
  update = {"temporal": temporal_config}
127
- bootstrap_config = safe_copy(bootstrap_config, update=update)
128
- registered_wfs, registered_acts, registered_deps = discover(
129
- workflows, act_names=activities, deps_name=dependencies
130
- )
131
- client = await bootstrap_config.to_temporal_client()
130
+ worker_config = safe_copy(worker_config, update=update)
131
+ client = await worker_config.to_temporal_client()
132
132
  event_loop = asyncio.get_event_loop()
133
- async with bootstrap_worker(
133
+ worker_ctx = worker_context(
134
134
  worker_id,
135
135
  activities=registered_acts,
136
136
  workflows=registered_wfs,
137
137
  dependencies=registered_deps,
138
- bootstrap_config=bootstrap_config,
138
+ worker_config=worker_config,
139
139
  client=client,
140
140
  event_loop=event_loop,
141
141
  task_queue=queue,
142
- ) as worker:
143
- try:
144
- await worker.run()
145
- except Exception as e: # noqa: BLE001
146
- await worker.shutdown()
147
- raise e
142
+ )
143
+ async with worker_ctx as worker:
144
+ await worker.is_done()
@@ -74,10 +74,6 @@ class TemporalClientConfig(BaseModel):
74
74
  )
75
75
  return self._client
76
76
 
77
- # For the lru_cache
78
- def __hash__(self) -> int:
79
- return id(self)
80
-
81
77
 
82
78
  class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
83
79
  model_config = DS_WORKER_SETTINGS_CONFIG
@@ -17,7 +17,6 @@ from datashare_python.config import (
17
17
  from datashare_python.dependencies import (
18
18
  lifespan_es_client,
19
19
  lifespan_task_client,
20
- lifespan_temporal_client,
21
20
  set_es_client,
22
21
  set_event_loop,
23
22
  set_loggers,
@@ -170,10 +169,11 @@ async def test_task_client(
170
169
 
171
170
 
172
171
  @pytest.fixture(scope="session")
173
- def test_temporal_client_session(
174
- worker_lifetime_deps, # noqa: ANN001, ARG001
175
- ) -> TemporalClient:
176
- return lifespan_temporal_client()
172
+ async def test_temporal_client_session(
173
+ test_worker_config: WorkerConfig,
174
+ event_loop: AbstractEventLoop, # noqa: ARG001
175
+ ) -> TemporalClient: # noqa: ANN001
176
+ return await test_worker_config.to_temporal_client()
177
177
 
178
178
 
179
179
  @pytest.fixture
@@ -21,6 +21,7 @@ EVENT_LOOP: ContextVar[AbstractEventLoop] = ContextVar("event_loop")
21
21
  ES_CLIENT: ContextVar[ESClient] = ContextVar("es_client")
22
22
  TASK_CLIENT: ContextVar[DatashareTaskClient] = ContextVar("task_client")
23
23
  TEMPORAL_CLIENT: ContextVar[TemporalClient] = ContextVar("temporal_client")
24
+ WORKER_CONFIG: ContextVar[WorkerConfig] = ContextVar("worker_config")
24
25
 
25
26
 
26
27
  def set_event_loop(event_loop: AbstractEventLoop) -> None:
@@ -40,6 +41,17 @@ def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
40
41
  logger.info("app config: %s", worker_config.model_dump_json(indent=2))
41
42
 
42
43
 
44
+ def set_worker_config(worker_config: WorkerConfig) -> None:
45
+ WORKER_CONFIG.set(worker_config)
46
+
47
+
48
+ def lifespan_worker_config() -> WorkerConfig:
49
+ try:
50
+ return WORKER_CONFIG.get()
51
+ except LookupError as e:
52
+ raise DependencyInjectionError("worker config") from e
53
+
54
+
43
55
  async def set_es_client(worker_config: WorkerConfig) -> ESClient:
44
56
  client = worker_config.to_es_client()
45
57
  ES_CLIENT.set(client)
@@ -3,6 +3,8 @@ import re
3
3
  from collections.abc import Callable, Iterable
4
4
  from importlib.metadata import entry_points
5
5
 
6
+ from .config import WorkerConfig
7
+ from .dependencies import set_worker_config
6
8
  from .types_ import ContextManagerFactory
7
9
  from .utils import ActivityWithProgress
8
10
 
@@ -11,9 +13,11 @@ logger = logging.getLogger(__name__)
11
13
  Activity = ActivityWithProgress | Callable | type
12
14
 
13
15
  _DEPENDENCIES = "dependencies"
14
- _WORKFLOW_GROUPS = "datashare.workflows"
15
- _ACTIVITIES_GROUPS = "datashare.activities"
16
- _DEPENDENCIES_GROUPS = "datashare.dependencies"
16
+ _WORKER_CONFIG_CLS = "worker_config_cls"
17
+ _WORKFLOW_GROUP = "datashare.workflows"
18
+ _ACTIVITIES_GROUP = "datashare.activities"
19
+ _DEPENDENCIES_GROUP = "datashare.dependencies"
20
+ _WORKER_CONFIG_CLS_GROUP = "datashare.worker_config_cls"
17
21
 
18
22
  _RegisteredWorkflow = tuple[str, type]
19
23
  _RegisteredActivity = tuple[str, Activity]
@@ -22,6 +26,7 @@ _Discovery = tuple[
22
26
  Iterable[_RegisteredWorkflow] | None,
23
27
  Iterable[_RegisteredActivity] | None,
24
28
  _Dependencies | None,
29
+ type[WorkerConfig],
25
30
  ]
26
31
 
27
32
 
@@ -58,11 +63,13 @@ def discover(
58
63
  if wf_names:
59
64
  msg += "workflow patterns " + ", ".join(wf_names) + " "
60
65
  if act_names:
61
- msg = "activity patterns " + ", ".join(act_names)
66
+ msg += "activity patterns " + ", ".join(act_names)
62
67
  raise ValueError(msg)
63
- deps = None
68
+ deps = []
64
69
  if deps_name is not None:
65
70
  deps = discover_dependencies(deps_name)
71
+ if set_worker_config not in deps:
72
+ deps.append(set_worker_config)
66
73
  if deps:
67
74
  n_deps = len(deps)
68
75
  discovered += "\n"
@@ -71,13 +78,15 @@ def discover(
71
78
  f"- {n_deps} dependenc{'ies' if n_deps > 1 else 'y'}:"
72
79
  f" {', '.join(deps_names)}"
73
80
  )
81
+ worker_config_cls = discover_worker_config_cls()
82
+ discovered += f"- worker config class: {worker_config_cls}"
74
83
  logger.info("discovered:\n%s", discovered)
75
- return wfs, acts, deps
84
+ return wfs, acts, deps, worker_config_cls
76
85
 
77
86
 
78
87
  def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
79
88
  pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
80
- impls = entry_points(group=_WORKFLOW_GROUPS)
89
+ impls = entry_points(group=_WORKFLOW_GROUP)
81
90
  registered = []
82
91
  for wf_impls in impls:
83
92
  wf_impls = wf_impls.load() # noqa: PLW2901
@@ -93,7 +102,7 @@ def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
93
102
 
94
103
  def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
95
104
  pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
96
- impls = entry_points(group=_ACTIVITIES_GROUPS)
105
+ impls = entry_points(group=_ACTIVITIES_GROUP)
97
106
  registered = []
98
107
  for act_impls in impls:
99
108
  act_impls = act_impls.load() # noqa: PLW2901
@@ -108,9 +117,9 @@ def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
108
117
 
109
118
 
110
119
  def discover_dependencies(name: str) -> _Dependencies:
111
- impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUPS)
120
+ impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUP)
112
121
  if not impls:
113
- available_impls = entry_points(group=_DEPENDENCIES_GROUPS)
122
+ available_impls = entry_points(group=_DEPENDENCIES_GROUP)
114
123
  msg = (
115
124
  f'failed to find dependency: "{name}", '
116
125
  f"available dependencies: {available_impls}"
@@ -131,6 +140,17 @@ def discover_dependencies(name: str) -> _Dependencies:
131
140
  raise LookupError(msg) from e
132
141
 
133
142
 
143
+ def discover_worker_config_cls() -> type[WorkerConfig]:
144
+ impls = entry_points(name=_WORKER_CONFIG_CLS, group=_WORKER_CONFIG_CLS_GROUP)
145
+ if not impls:
146
+ return WorkerConfig
147
+ if len(impls) > 1:
148
+ msg = f'found multiple registered worker configs classes": {impls}'
149
+ raise ValueError(msg)
150
+ deps_registry = impls[_WORKER_CONFIG_CLS].load()
151
+ return deps_registry
152
+
153
+
134
154
  def _parse_wf_name(wf_type: type) -> str:
135
155
  if not isinstance(wf_type, type):
136
156
  msg = (
Binary file
@@ -43,6 +43,13 @@ documentation for more details:
43
43
  _ACTIVITY_THREAD_NAME_PREFIX = "datashare-activity-worker-"
44
44
 
45
45
 
46
+ class DatashareWorker(Worker):
47
+ async def is_done(self) -> None:
48
+ if self._async_context_run_task is None:
49
+ raise ValueError("worker is not running")
50
+ await self._async_context_run_task
51
+
52
+
46
53
  def datashare_worker(
47
54
  client: TemporalClient,
48
55
  worker_id: str,
@@ -53,7 +60,7 @@ def datashare_worker(
53
60
  # Scale horizontally be default for activities, each worker processes one activity
54
61
  # at a time
55
62
  max_concurrent_io_activities: int = 10,
56
- ) -> Worker:
63
+ ) -> DatashareWorker:
57
64
  if workflows is None:
58
65
  workflows = []
59
66
  if activities is None:
@@ -77,7 +84,7 @@ def datashare_worker(
77
84
  if workflows:
78
85
  logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
79
86
 
80
- return Worker(
87
+ return DatashareWorker(
81
88
  client,
82
89
  identity=worker_id,
83
90
  workflows=workflows,
@@ -124,21 +131,21 @@ def init_activity(
124
131
 
125
132
 
126
133
  @asynccontextmanager
127
- async def bootstrap_worker(
134
+ async def worker_context(
128
135
  worker_id: str,
129
136
  *,
130
137
  activities: list[Callable[..., Any] | None] | None = None,
131
138
  workflows: list[type] | None = None,
132
- bootstrap_config: WorkerConfig,
139
+ worker_config: WorkerConfig,
133
140
  client: TemporalClient,
134
141
  event_loop: AbstractEventLoop,
135
142
  task_queue: str,
136
143
  dependencies: list[ContextManagerFactory] | None = None,
137
- ) -> AsyncGenerator[Worker, None]:
144
+ ) -> AsyncGenerator[DatashareWorker, None]:
138
145
  deps_cm = (
139
146
  with_dependencies(
140
147
  dependencies,
141
- worker_config=bootstrap_config,
148
+ worker_config=worker_config,
142
149
  worker_id=worker_id,
143
150
  event_loop=event_loop,
144
151
  )
@@ -159,9 +166,10 @@ async def bootstrap_worker(
159
166
  workflows=workflows,
160
167
  activities=acts,
161
168
  task_queue=task_queue,
162
- max_concurrent_io_activities=bootstrap_config.max_concurrent_io_activities,
169
+ max_concurrent_io_activities=worker_config.max_concurrent_io_activities,
163
170
  )
164
- yield worker
171
+ async with worker:
172
+ yield worker
165
173
 
166
174
 
167
175
  @asynccontextmanager
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datashare-python
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Manage Pythoœn tasks and local resources in Datashare
5
5
  Project-URL: Homepage, https://icij.github.io/datashare-python/
6
6
  Project-URL: Documentation, https://icij.github.io/datashare-python/
@@ -1,11 +1,11 @@
1
1
  datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,22
2
2
  datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
4
- datashare_python/config.py,sha256=u6iyOeSXzIO30Yja8Vj9LjM-cq8ESRBy3Kse6UadAMg,3763
5
- datashare_python/conftest.py,sha256=BdRLjy9eJtxAKLDCcon1Nyhzn54CIw2z4s3ZOupNYGo,8256
4
+ datashare_python/config.py,sha256=p-uBTle30kbUdqrj8rXcYv2gHiNwtqcYMnGi4Kctumk,3683
5
+ datashare_python/conftest.py,sha256=rQneF4ms-Gsx4NeRrtrU1bH-rjz3eJ6_LoLB9w-eRG8,8306
6
6
  datashare_python/constants.py,sha256=e6Px11OUee9GSHwTgsgFMszGCMwpW-OznHSMgINvepc,338
7
- datashare_python/dependencies.py,sha256=Diu7alKGaFWyC_ajp0fKU-xp8u5f_8x1axAHVBlppD0,3707
8
- datashare_python/discovery.py,sha256=khWTm11NlMVkRyxPV1lJimcTTicuVdxWHOC4H6PfFwE,6128
7
+ datashare_python/dependencies.py,sha256=4UsVFKRjd2Q0ghg_fUU24P26tFYhg_SnAENj2mKErrY,4060
8
+ datashare_python/discovery.py,sha256=UsfIb_pL56BQ5i5xvfuhOO0bDefjA_oToxrCeJQQbcU,6925
9
9
  datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
10
10
  datashare_python/local_client.py,sha256=GP9MTcHVQ1mcb2eO6TiQ7mzQdx199lZRhK8DRuJqJVQ,2359
11
11
  datashare_python/objects.py,sha256=MbTdBHbNNYeWbc7vrWlu2DcJT0uCSvtrDIiPpfgI_s4,4356
@@ -13,15 +13,15 @@ datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2az
13
13
  datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
14
14
  datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
15
15
  datashare_python/utils.py,sha256=DQt-rBwC3Ok72u8VyerG3rqwUTx3ftLfPdMQ5cnRrgs,16801
16
- datashare_python/worker-template.tar.gz,sha256=uarhX-BHryewAFwBZ98_PdA_QUKhApwlXfRYVIULbm0,142242
17
- datashare_python/worker.py,sha256=A4SnmDB4y0ck6Wp_UZWdsSOyTvW54Z2Bq76gxtp-_PE,6070
16
+ datashare_python/worker-template.tar.gz,sha256=euuWPSjRpxI5-b1SQa8wJNQ7dfIMqUfdacpRrnKC_mk,274849
17
+ datashare_python/worker.py,sha256=UdSCWZw8qhkhzok89oU7J35VTDZwxRAqef-5Z8yt95A,6333
18
18
  datashare_python/cli/__init__.py,sha256=5MGSE_0SwlOiwbyPwsP8RIXlTBB2_GGP0zDg4l6UAIY,1479
19
19
  datashare_python/cli/local.py,sha256=S-7qMpSqzi0oMvu01TCFEb8tayEvpw4pXMdCszKEYtU,986
20
20
  datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
21
21
  datashare_python/cli/task.py,sha256=9If5OC7loG4C4gWWl4iOeqPJ4GOLlCWXQfuNLUHORrQ,5860
22
22
  datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
23
- datashare_python/cli/worker.py,sha256=_LZFtv6O57Ez7XdRKmmwQ0b_5_8uFajPySv_jbzXbjI,5384
24
- datashare_python-0.3.0.dist-info/METADATA,sha256=z7AVC4VL38lRzJZlcLHvrj6DVj1H4lWx35HBlZH0btQ,907
25
- datashare_python-0.3.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
26
- datashare_python-0.3.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
27
- datashare_python-0.3.0.dist-info/RECORD,,
23
+ datashare_python/cli/worker.py,sha256=I4KTpFIpXFowioFn72Rm6LBCYlY-Dhp4NBIPvtRgUXE,5283
24
+ datashare_python-0.5.0.dist-info/METADATA,sha256=Fg8np6ksHvkPwEn-3MhlfJFVLxptcH5EgluHmauAhAc,907
25
+ datashare_python-0.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
26
+ datashare_python-0.5.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
27
+ datashare_python-0.5.0.dist-info/RECORD,,