datashare-python 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datashare_python/cli/worker.py +15 -18
- datashare_python/config.py +0 -4
- datashare_python/conftest.py +5 -5
- datashare_python/dependencies.py +12 -0
- datashare_python/discovery.py +30 -10
- datashare_python/worker-template.tar.gz +0 -0
- datashare_python/worker.py +16 -8
- {datashare_python-0.3.0.dist-info → datashare_python-0.5.0.dist-info}/METADATA +1 -1
- {datashare_python-0.3.0.dist-info → datashare_python-0.5.0.dist-info}/RECORD +11 -11
- {datashare_python-0.3.0.dist-info → datashare_python-0.5.0.dist-info}/WHEEL +0 -0
- {datashare_python-0.3.0.dist-info → datashare_python-0.5.0.dist-info}/entry_points.txt +0 -0
datashare_python/cli/worker.py
CHANGED
|
@@ -7,9 +7,9 @@ import typer
|
|
|
7
7
|
import yaml
|
|
8
8
|
from icij_common.pydantic_utils import safe_copy
|
|
9
9
|
|
|
10
|
-
from datashare_python.config import TemporalClientConfig
|
|
10
|
+
from datashare_python.config import TemporalClientConfig
|
|
11
11
|
from datashare_python.discovery import discover, discover_activities, discover_workflows
|
|
12
|
-
from datashare_python.worker import
|
|
12
|
+
from datashare_python.worker import create_worker_id, worker_context
|
|
13
13
|
|
|
14
14
|
from .utils import AsyncTyper
|
|
15
15
|
|
|
@@ -102,19 +102,22 @@ async def start(
|
|
|
102
102
|
typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP),
|
|
103
103
|
] = None,
|
|
104
104
|
) -> None:
|
|
105
|
+
registered_wfs, registered_acts, registered_deps, worker_config_cls = discover(
|
|
106
|
+
workflows, act_names=activities, deps_name=dependencies
|
|
107
|
+
)
|
|
105
108
|
if config_path is not None:
|
|
106
109
|
with config_path.open() as f:
|
|
107
|
-
|
|
110
|
+
worker_config = worker_config_cls.model_validate(
|
|
108
111
|
yaml.load(f, Loader=yaml.Loader)
|
|
109
112
|
)
|
|
110
113
|
else:
|
|
111
|
-
|
|
114
|
+
worker_config = worker_config_cls()
|
|
112
115
|
worker_id = create_worker_id(worker_id_prefix or "worker")
|
|
113
116
|
logger.info(
|
|
114
117
|
"starting worker %s on queue %s, with config: %s",
|
|
115
118
|
worker_id,
|
|
116
119
|
queue,
|
|
117
|
-
|
|
120
|
+
worker_config.model_dump_json(indent=2),
|
|
118
121
|
)
|
|
119
122
|
temporal_override = dict()
|
|
120
123
|
if temporal_address is not None:
|
|
@@ -124,24 +127,18 @@ async def start(
|
|
|
124
127
|
if temporal_override:
|
|
125
128
|
temporal_config = TemporalClientConfig(**temporal_override)
|
|
126
129
|
update = {"temporal": temporal_config}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
workflows, act_names=activities, deps_name=dependencies
|
|
130
|
-
)
|
|
131
|
-
client = await bootstrap_config.to_temporal_client()
|
|
130
|
+
worker_config = safe_copy(worker_config, update=update)
|
|
131
|
+
client = await worker_config.to_temporal_client()
|
|
132
132
|
event_loop = asyncio.get_event_loop()
|
|
133
|
-
|
|
133
|
+
worker_ctx = worker_context(
|
|
134
134
|
worker_id,
|
|
135
135
|
activities=registered_acts,
|
|
136
136
|
workflows=registered_wfs,
|
|
137
137
|
dependencies=registered_deps,
|
|
138
|
-
|
|
138
|
+
worker_config=worker_config,
|
|
139
139
|
client=client,
|
|
140
140
|
event_loop=event_loop,
|
|
141
141
|
task_queue=queue,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
except Exception as e: # noqa: BLE001
|
|
146
|
-
await worker.shutdown()
|
|
147
|
-
raise e
|
|
142
|
+
)
|
|
143
|
+
async with worker_ctx as worker:
|
|
144
|
+
await worker.is_done()
|
datashare_python/config.py
CHANGED
|
@@ -74,10 +74,6 @@ class TemporalClientConfig(BaseModel):
|
|
|
74
74
|
)
|
|
75
75
|
return self._client
|
|
76
76
|
|
|
77
|
-
# For the lru_cache
|
|
78
|
-
def __hash__(self) -> int:
|
|
79
|
-
return id(self)
|
|
80
|
-
|
|
81
77
|
|
|
82
78
|
class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
|
|
83
79
|
model_config = DS_WORKER_SETTINGS_CONFIG
|
datashare_python/conftest.py
CHANGED
|
@@ -17,7 +17,6 @@ from datashare_python.config import (
|
|
|
17
17
|
from datashare_python.dependencies import (
|
|
18
18
|
lifespan_es_client,
|
|
19
19
|
lifespan_task_client,
|
|
20
|
-
lifespan_temporal_client,
|
|
21
20
|
set_es_client,
|
|
22
21
|
set_event_loop,
|
|
23
22
|
set_loggers,
|
|
@@ -170,10 +169,11 @@ async def test_task_client(
|
|
|
170
169
|
|
|
171
170
|
|
|
172
171
|
@pytest.fixture(scope="session")
|
|
173
|
-
def test_temporal_client_session(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
172
|
+
async def test_temporal_client_session(
|
|
173
|
+
test_worker_config: WorkerConfig,
|
|
174
|
+
event_loop: AbstractEventLoop, # noqa: ARG001
|
|
175
|
+
) -> TemporalClient: # noqa: ANN001
|
|
176
|
+
return await test_worker_config.to_temporal_client()
|
|
177
177
|
|
|
178
178
|
|
|
179
179
|
@pytest.fixture
|
datashare_python/dependencies.py
CHANGED
|
@@ -21,6 +21,7 @@ EVENT_LOOP: ContextVar[AbstractEventLoop] = ContextVar("event_loop")
|
|
|
21
21
|
ES_CLIENT: ContextVar[ESClient] = ContextVar("es_client")
|
|
22
22
|
TASK_CLIENT: ContextVar[DatashareTaskClient] = ContextVar("task_client")
|
|
23
23
|
TEMPORAL_CLIENT: ContextVar[TemporalClient] = ContextVar("temporal_client")
|
|
24
|
+
WORKER_CONFIG: ContextVar[WorkerConfig] = ContextVar("worker_config")
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
def set_event_loop(event_loop: AbstractEventLoop) -> None:
|
|
@@ -40,6 +41,17 @@ def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
|
|
|
40
41
|
logger.info("app config: %s", worker_config.model_dump_json(indent=2))
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
def set_worker_config(worker_config: WorkerConfig) -> None:
|
|
45
|
+
WORKER_CONFIG.set(worker_config)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def lifespan_worker_config() -> WorkerConfig:
|
|
49
|
+
try:
|
|
50
|
+
return WORKER_CONFIG.get()
|
|
51
|
+
except LookupError as e:
|
|
52
|
+
raise DependencyInjectionError("worker config") from e
|
|
53
|
+
|
|
54
|
+
|
|
43
55
|
async def set_es_client(worker_config: WorkerConfig) -> ESClient:
|
|
44
56
|
client = worker_config.to_es_client()
|
|
45
57
|
ES_CLIENT.set(client)
|
datashare_python/discovery.py
CHANGED
|
@@ -3,6 +3,8 @@ import re
|
|
|
3
3
|
from collections.abc import Callable, Iterable
|
|
4
4
|
from importlib.metadata import entry_points
|
|
5
5
|
|
|
6
|
+
from .config import WorkerConfig
|
|
7
|
+
from .dependencies import set_worker_config
|
|
6
8
|
from .types_ import ContextManagerFactory
|
|
7
9
|
from .utils import ActivityWithProgress
|
|
8
10
|
|
|
@@ -11,9 +13,11 @@ logger = logging.getLogger(__name__)
|
|
|
11
13
|
Activity = ActivityWithProgress | Callable | type
|
|
12
14
|
|
|
13
15
|
_DEPENDENCIES = "dependencies"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
_WORKER_CONFIG_CLS = "worker_config_cls"
|
|
17
|
+
_WORKFLOW_GROUP = "datashare.workflows"
|
|
18
|
+
_ACTIVITIES_GROUP = "datashare.activities"
|
|
19
|
+
_DEPENDENCIES_GROUP = "datashare.dependencies"
|
|
20
|
+
_WORKER_CONFIG_CLS_GROUP = "datashare.worker_config_cls"
|
|
17
21
|
|
|
18
22
|
_RegisteredWorkflow = tuple[str, type]
|
|
19
23
|
_RegisteredActivity = tuple[str, Activity]
|
|
@@ -22,6 +26,7 @@ _Discovery = tuple[
|
|
|
22
26
|
Iterable[_RegisteredWorkflow] | None,
|
|
23
27
|
Iterable[_RegisteredActivity] | None,
|
|
24
28
|
_Dependencies | None,
|
|
29
|
+
type[WorkerConfig],
|
|
25
30
|
]
|
|
26
31
|
|
|
27
32
|
|
|
@@ -58,11 +63,13 @@ def discover(
|
|
|
58
63
|
if wf_names:
|
|
59
64
|
msg += "workflow patterns " + ", ".join(wf_names) + " "
|
|
60
65
|
if act_names:
|
|
61
|
-
msg
|
|
66
|
+
msg += "activity patterns " + ", ".join(act_names)
|
|
62
67
|
raise ValueError(msg)
|
|
63
|
-
deps =
|
|
68
|
+
deps = []
|
|
64
69
|
if deps_name is not None:
|
|
65
70
|
deps = discover_dependencies(deps_name)
|
|
71
|
+
if set_worker_config not in deps:
|
|
72
|
+
deps.append(set_worker_config)
|
|
66
73
|
if deps:
|
|
67
74
|
n_deps = len(deps)
|
|
68
75
|
discovered += "\n"
|
|
@@ -71,13 +78,15 @@ def discover(
|
|
|
71
78
|
f"- {n_deps} dependenc{'ies' if n_deps > 1 else 'y'}:"
|
|
72
79
|
f" {', '.join(deps_names)}"
|
|
73
80
|
)
|
|
81
|
+
worker_config_cls = discover_worker_config_cls()
|
|
82
|
+
discovered += f"- worker config class: {worker_config_cls}"
|
|
74
83
|
logger.info("discovered:\n%s", discovered)
|
|
75
|
-
return wfs, acts, deps
|
|
84
|
+
return wfs, acts, deps, worker_config_cls
|
|
76
85
|
|
|
77
86
|
|
|
78
87
|
def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
|
|
79
88
|
pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
|
|
80
|
-
impls = entry_points(group=
|
|
89
|
+
impls = entry_points(group=_WORKFLOW_GROUP)
|
|
81
90
|
registered = []
|
|
82
91
|
for wf_impls in impls:
|
|
83
92
|
wf_impls = wf_impls.load() # noqa: PLW2901
|
|
@@ -93,7 +102,7 @@ def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
|
|
|
93
102
|
|
|
94
103
|
def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
|
|
95
104
|
pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
|
|
96
|
-
impls = entry_points(group=
|
|
105
|
+
impls = entry_points(group=_ACTIVITIES_GROUP)
|
|
97
106
|
registered = []
|
|
98
107
|
for act_impls in impls:
|
|
99
108
|
act_impls = act_impls.load() # noqa: PLW2901
|
|
@@ -108,9 +117,9 @@ def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
|
|
|
108
117
|
|
|
109
118
|
|
|
110
119
|
def discover_dependencies(name: str) -> _Dependencies:
|
|
111
|
-
impls = entry_points(name=_DEPENDENCIES, group=
|
|
120
|
+
impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUP)
|
|
112
121
|
if not impls:
|
|
113
|
-
available_impls = entry_points(group=
|
|
122
|
+
available_impls = entry_points(group=_DEPENDENCIES_GROUP)
|
|
114
123
|
msg = (
|
|
115
124
|
f'failed to find dependency: "{name}", '
|
|
116
125
|
f"available dependencies: {available_impls}"
|
|
@@ -131,6 +140,17 @@ def discover_dependencies(name: str) -> _Dependencies:
|
|
|
131
140
|
raise LookupError(msg) from e
|
|
132
141
|
|
|
133
142
|
|
|
143
|
+
def discover_worker_config_cls() -> type[WorkerConfig]:
|
|
144
|
+
impls = entry_points(name=_WORKER_CONFIG_CLS, group=_WORKER_CONFIG_CLS_GROUP)
|
|
145
|
+
if not impls:
|
|
146
|
+
return WorkerConfig
|
|
147
|
+
if len(impls) > 1:
|
|
148
|
+
msg = f'found multiple registered worker configs classes": {impls}'
|
|
149
|
+
raise ValueError(msg)
|
|
150
|
+
deps_registry = impls[_WORKER_CONFIG_CLS].load()
|
|
151
|
+
return deps_registry
|
|
152
|
+
|
|
153
|
+
|
|
134
154
|
def _parse_wf_name(wf_type: type) -> str:
|
|
135
155
|
if not isinstance(wf_type, type):
|
|
136
156
|
msg = (
|
|
Binary file
|
datashare_python/worker.py
CHANGED
|
@@ -43,6 +43,13 @@ documentation for more details:
|
|
|
43
43
|
_ACTIVITY_THREAD_NAME_PREFIX = "datashare-activity-worker-"
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
class DatashareWorker(Worker):
|
|
47
|
+
async def is_done(self) -> None:
|
|
48
|
+
if self._async_context_run_task is None:
|
|
49
|
+
raise ValueError("worker is not running")
|
|
50
|
+
await self._async_context_run_task
|
|
51
|
+
|
|
52
|
+
|
|
46
53
|
def datashare_worker(
|
|
47
54
|
client: TemporalClient,
|
|
48
55
|
worker_id: str,
|
|
@@ -53,7 +60,7 @@ def datashare_worker(
|
|
|
53
60
|
# Scale horizontally be default for activities, each worker processes one activity
|
|
54
61
|
# at a time
|
|
55
62
|
max_concurrent_io_activities: int = 10,
|
|
56
|
-
) ->
|
|
63
|
+
) -> DatashareWorker:
|
|
57
64
|
if workflows is None:
|
|
58
65
|
workflows = []
|
|
59
66
|
if activities is None:
|
|
@@ -77,7 +84,7 @@ def datashare_worker(
|
|
|
77
84
|
if workflows:
|
|
78
85
|
logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
|
|
79
86
|
|
|
80
|
-
return
|
|
87
|
+
return DatashareWorker(
|
|
81
88
|
client,
|
|
82
89
|
identity=worker_id,
|
|
83
90
|
workflows=workflows,
|
|
@@ -124,21 +131,21 @@ def init_activity(
|
|
|
124
131
|
|
|
125
132
|
|
|
126
133
|
@asynccontextmanager
|
|
127
|
-
async def
|
|
134
|
+
async def worker_context(
|
|
128
135
|
worker_id: str,
|
|
129
136
|
*,
|
|
130
137
|
activities: list[Callable[..., Any] | None] | None = None,
|
|
131
138
|
workflows: list[type] | None = None,
|
|
132
|
-
|
|
139
|
+
worker_config: WorkerConfig,
|
|
133
140
|
client: TemporalClient,
|
|
134
141
|
event_loop: AbstractEventLoop,
|
|
135
142
|
task_queue: str,
|
|
136
143
|
dependencies: list[ContextManagerFactory] | None = None,
|
|
137
|
-
) -> AsyncGenerator[
|
|
144
|
+
) -> AsyncGenerator[DatashareWorker, None]:
|
|
138
145
|
deps_cm = (
|
|
139
146
|
with_dependencies(
|
|
140
147
|
dependencies,
|
|
141
|
-
worker_config=
|
|
148
|
+
worker_config=worker_config,
|
|
142
149
|
worker_id=worker_id,
|
|
143
150
|
event_loop=event_loop,
|
|
144
151
|
)
|
|
@@ -159,9 +166,10 @@ async def bootstrap_worker(
|
|
|
159
166
|
workflows=workflows,
|
|
160
167
|
activities=acts,
|
|
161
168
|
task_queue=task_queue,
|
|
162
|
-
max_concurrent_io_activities=
|
|
169
|
+
max_concurrent_io_activities=worker_config.max_concurrent_io_activities,
|
|
163
170
|
)
|
|
164
|
-
|
|
171
|
+
async with worker:
|
|
172
|
+
yield worker
|
|
165
173
|
|
|
166
174
|
|
|
167
175
|
@asynccontextmanager
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Manage Pythoœn tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
datashare_python/.gitignore,sha256=e-SRgnvGGdsjRrqgKsTzALz6Obx8IYiOjr0yaAxT6v8,22
|
|
2
2
|
datashare_python/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
datashare_python/__main__.py,sha256=g-fvS46zl9umKmGrSpl-OG-8PSuZgjqvTCqjpsZtSps,101
|
|
4
|
-
datashare_python/config.py,sha256=
|
|
5
|
-
datashare_python/conftest.py,sha256=
|
|
4
|
+
datashare_python/config.py,sha256=p-uBTle30kbUdqrj8rXcYv2gHiNwtqcYMnGi4Kctumk,3683
|
|
5
|
+
datashare_python/conftest.py,sha256=rQneF4ms-Gsx4NeRrtrU1bH-rjz3eJ6_LoLB9w-eRG8,8306
|
|
6
6
|
datashare_python/constants.py,sha256=e6Px11OUee9GSHwTgsgFMszGCMwpW-OznHSMgINvepc,338
|
|
7
|
-
datashare_python/dependencies.py,sha256=
|
|
8
|
-
datashare_python/discovery.py,sha256=
|
|
7
|
+
datashare_python/dependencies.py,sha256=4UsVFKRjd2Q0ghg_fUU24P26tFYhg_SnAENj2mKErrY,4060
|
|
8
|
+
datashare_python/discovery.py,sha256=UsfIb_pL56BQ5i5xvfuhOO0bDefjA_oToxrCeJQQbcU,6925
|
|
9
9
|
datashare_python/exceptions.py,sha256=bVHEAXxDPKfxeeMC0hJXEsrJkgsKO2ESAhxWU96GA4M,496
|
|
10
10
|
datashare_python/local_client.py,sha256=GP9MTcHVQ1mcb2eO6TiQ7mzQdx199lZRhK8DRuJqJVQ,2359
|
|
11
11
|
datashare_python/objects.py,sha256=MbTdBHbNNYeWbc7vrWlu2DcJT0uCSvtrDIiPpfgI_s4,4356
|
|
@@ -13,15 +13,15 @@ datashare_python/task_client.py,sha256=oTmP8bvZW0UyhLNMi1AV3XIAx7hrdbxNRss2Mw2az
|
|
|
13
13
|
datashare_python/template.py,sha256=RxKTYLXoS_EQ8Jc41JkBXppPdbCFqDWfP3BmC0gvB5o,4024
|
|
14
14
|
datashare_python/types_.py,sha256=9Hk1XqpdXbM1TnEzwvJ5G9ABbaCZW9KgBTtiPBVn_7k,649
|
|
15
15
|
datashare_python/utils.py,sha256=DQt-rBwC3Ok72u8VyerG3rqwUTx3ftLfPdMQ5cnRrgs,16801
|
|
16
|
-
datashare_python/worker-template.tar.gz,sha256=
|
|
17
|
-
datashare_python/worker.py,sha256=
|
|
16
|
+
datashare_python/worker-template.tar.gz,sha256=euuWPSjRpxI5-b1SQa8wJNQ7dfIMqUfdacpRrnKC_mk,274849
|
|
17
|
+
datashare_python/worker.py,sha256=UdSCWZw8qhkhzok89oU7J35VTDZwxRAqef-5Z8yt95A,6333
|
|
18
18
|
datashare_python/cli/__init__.py,sha256=5MGSE_0SwlOiwbyPwsP8RIXlTBB2_GGP0zDg4l6UAIY,1479
|
|
19
19
|
datashare_python/cli/local.py,sha256=S-7qMpSqzi0oMvu01TCFEb8tayEvpw4pXMdCszKEYtU,986
|
|
20
20
|
datashare_python/cli/project.py,sha256=w32Gy9AOL5B00uDT4in7YUCt2g68FnNbvwg2M3a8G6o,946
|
|
21
21
|
datashare_python/cli/task.py,sha256=9If5OC7loG4C4gWWl4iOeqPJ4GOLlCWXQfuNLUHORrQ,5860
|
|
22
22
|
datashare_python/cli/utils.py,sha256=p69CQb0zfixuyBkiZprhdMCc_NuYwXyAn6vC9H1UzAw,911
|
|
23
|
-
datashare_python/cli/worker.py,sha256=
|
|
24
|
-
datashare_python-0.
|
|
25
|
-
datashare_python-0.
|
|
26
|
-
datashare_python-0.
|
|
27
|
-
datashare_python-0.
|
|
23
|
+
datashare_python/cli/worker.py,sha256=I4KTpFIpXFowioFn72Rm6LBCYlY-Dhp4NBIPvtRgUXE,5283
|
|
24
|
+
datashare_python-0.5.0.dist-info/METADATA,sha256=Fg8np6ksHvkPwEn-3MhlfJFVLxptcH5EgluHmauAhAc,907
|
|
25
|
+
datashare_python-0.5.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
26
|
+
datashare_python-0.5.0.dist-info/entry_points.txt,sha256=ILE7auxabHWiu3GC-AunWnzjhOI_SbZp7D4GqZHlLw4,68
|
|
27
|
+
datashare_python-0.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|