datashare-python 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datashare_python-0.3.0 → datashare_python-0.4.0}/PKG-INFO +1 -1
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/cli/worker.py +22 -18
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/config.py +0 -4
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/conftest.py +5 -5
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/dependencies.py +12 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/discovery.py +51 -11
- datashare_python-0.4.0/datashare_python/worker-template.tar.gz +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/worker.py +16 -8
- {datashare_python-0.3.0 → datashare_python-0.4.0}/pyproject.toml +1 -1
- datashare_python-0.3.0/datashare_python/worker-template.tar.gz +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/.gitignore +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/README.md +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/.gitignore +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/__init__.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/__main__.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/cli/__init__.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/cli/local.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/cli/project.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/cli/task.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/cli/utils.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/constants.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/exceptions.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/local_client.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/objects.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/task_client.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/template.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/types_.py +0 -0
- {datashare_python-0.3.0 → datashare_python-0.4.0}/datashare_python/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Manage Pythoœn tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -7,9 +7,9 @@ import typer
|
|
|
7
7
|
import yaml
|
|
8
8
|
from icij_common.pydantic_utils import safe_copy
|
|
9
9
|
|
|
10
|
-
from datashare_python.config import TemporalClientConfig
|
|
10
|
+
from datashare_python.config import TemporalClientConfig
|
|
11
11
|
from datashare_python.discovery import discover, discover_activities, discover_workflows
|
|
12
|
-
from datashare_python.worker import
|
|
12
|
+
from datashare_python.worker import create_worker_id, worker_context
|
|
13
13
|
|
|
14
14
|
from .utils import AsyncTyper
|
|
15
15
|
|
|
@@ -24,6 +24,7 @@ _LIST_ACTIVITY_NAMES_HELP = "activity names filters (supports regexes)"
|
|
|
24
24
|
_START_WORKER_WORKFLOWS_HELP = "workflow names run by the worker (supports regexes)"
|
|
25
25
|
_START_WORKER_ACTIVITIES_HELP = "activity names run by the worker (supports regexes)"
|
|
26
26
|
_START_WORKER_DEPS_HELP = "worker lifetime dependencies name in the registry"
|
|
27
|
+
_START_WORKER_CONFIG_HELP = "worker config class key the worker configs registry"
|
|
27
28
|
_START_WORKER_WORKER_ID_PREFIX_HELP = "worker ID prefix"
|
|
28
29
|
_START_WORKER_CONFIG_PATH_HELP = (
|
|
29
30
|
"path to a worker config YAML file,"
|
|
@@ -85,6 +86,9 @@ async def start(
|
|
|
85
86
|
dependencies: Annotated[
|
|
86
87
|
str | None, typer.Option(help=_START_WORKER_DEPS_HELP)
|
|
87
88
|
] = None,
|
|
89
|
+
worker_config_name: Annotated[
|
|
90
|
+
str | None, typer.Option(help=_START_WORKER_CONFIG_HELP)
|
|
91
|
+
] = None,
|
|
88
92
|
config_path: Annotated[
|
|
89
93
|
Path | None,
|
|
90
94
|
typer.Option(
|
|
@@ -102,19 +106,25 @@ async def start(
|
|
|
102
106
|
typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP),
|
|
103
107
|
] = None,
|
|
104
108
|
) -> None:
|
|
109
|
+
registered_wfs, registered_acts, registered_deps, worker_config_cls = discover(
|
|
110
|
+
workflows,
|
|
111
|
+
act_names=activities,
|
|
112
|
+
deps_name=dependencies,
|
|
113
|
+
worker_config_name=worker_config_name,
|
|
114
|
+
)
|
|
105
115
|
if config_path is not None:
|
|
106
116
|
with config_path.open() as f:
|
|
107
|
-
|
|
117
|
+
worker_config = worker_config_cls.model_validate(
|
|
108
118
|
yaml.load(f, Loader=yaml.Loader)
|
|
109
119
|
)
|
|
110
120
|
else:
|
|
111
|
-
|
|
121
|
+
worker_config = worker_config_cls()
|
|
112
122
|
worker_id = create_worker_id(worker_id_prefix or "worker")
|
|
113
123
|
logger.info(
|
|
114
124
|
"starting worker %s on queue %s, with config: %s",
|
|
115
125
|
worker_id,
|
|
116
126
|
queue,
|
|
117
|
-
|
|
127
|
+
worker_config.model_dump_json(indent=2),
|
|
118
128
|
)
|
|
119
129
|
temporal_override = dict()
|
|
120
130
|
if temporal_address is not None:
|
|
@@ -124,24 +134,18 @@ async def start(
|
|
|
124
134
|
if temporal_override:
|
|
125
135
|
temporal_config = TemporalClientConfig(**temporal_override)
|
|
126
136
|
update = {"temporal": temporal_config}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
workflows, act_names=activities, deps_name=dependencies
|
|
130
|
-
)
|
|
131
|
-
client = await bootstrap_config.to_temporal_client()
|
|
137
|
+
worker_config = safe_copy(worker_config, update=update)
|
|
138
|
+
client = await worker_config.to_temporal_client()
|
|
132
139
|
event_loop = asyncio.get_event_loop()
|
|
133
|
-
|
|
140
|
+
worker_ctx = worker_context(
|
|
134
141
|
worker_id,
|
|
135
142
|
activities=registered_acts,
|
|
136
143
|
workflows=registered_wfs,
|
|
137
144
|
dependencies=registered_deps,
|
|
138
|
-
|
|
145
|
+
worker_config=worker_config,
|
|
139
146
|
client=client,
|
|
140
147
|
event_loop=event_loop,
|
|
141
148
|
task_queue=queue,
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
except Exception as e: # noqa: BLE001
|
|
146
|
-
await worker.shutdown()
|
|
147
|
-
raise e
|
|
149
|
+
)
|
|
150
|
+
async with worker_ctx as worker:
|
|
151
|
+
await worker.is_done()
|
|
@@ -74,10 +74,6 @@ class TemporalClientConfig(BaseModel):
|
|
|
74
74
|
)
|
|
75
75
|
return self._client
|
|
76
76
|
|
|
77
|
-
# For the lru_cache
|
|
78
|
-
def __hash__(self) -> int:
|
|
79
|
-
return id(self)
|
|
80
|
-
|
|
81
77
|
|
|
82
78
|
class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
|
|
83
79
|
model_config = DS_WORKER_SETTINGS_CONFIG
|
|
@@ -17,7 +17,6 @@ from datashare_python.config import (
|
|
|
17
17
|
from datashare_python.dependencies import (
|
|
18
18
|
lifespan_es_client,
|
|
19
19
|
lifespan_task_client,
|
|
20
|
-
lifespan_temporal_client,
|
|
21
20
|
set_es_client,
|
|
22
21
|
set_event_loop,
|
|
23
22
|
set_loggers,
|
|
@@ -170,10 +169,11 @@ async def test_task_client(
|
|
|
170
169
|
|
|
171
170
|
|
|
172
171
|
@pytest.fixture(scope="session")
|
|
173
|
-
def test_temporal_client_session(
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
172
|
+
async def test_temporal_client_session(
|
|
173
|
+
test_worker_config: WorkerConfig,
|
|
174
|
+
event_loop: AbstractEventLoop, # noqa: ARG001
|
|
175
|
+
) -> TemporalClient: # noqa: ANN001
|
|
176
|
+
return await test_worker_config.to_temporal_client()
|
|
177
177
|
|
|
178
178
|
|
|
179
179
|
@pytest.fixture
|
|
@@ -21,6 +21,7 @@ EVENT_LOOP: ContextVar[AbstractEventLoop] = ContextVar("event_loop")
|
|
|
21
21
|
ES_CLIENT: ContextVar[ESClient] = ContextVar("es_client")
|
|
22
22
|
TASK_CLIENT: ContextVar[DatashareTaskClient] = ContextVar("task_client")
|
|
23
23
|
TEMPORAL_CLIENT: ContextVar[TemporalClient] = ContextVar("temporal_client")
|
|
24
|
+
WORKER_CONFIG: ContextVar[WorkerConfig] = ContextVar("worker_config")
|
|
24
25
|
|
|
25
26
|
|
|
26
27
|
def set_event_loop(event_loop: AbstractEventLoop) -> None:
|
|
@@ -40,6 +41,17 @@ def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
|
|
|
40
41
|
logger.info("app config: %s", worker_config.model_dump_json(indent=2))
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
def set_worker_config(worker_config: WorkerConfig) -> None:
|
|
45
|
+
WORKER_CONFIG.set(worker_config)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def lifespan_worker_config() -> WorkerConfig:
|
|
49
|
+
try:
|
|
50
|
+
return WORKER_CONFIG.get()
|
|
51
|
+
except LookupError as e:
|
|
52
|
+
raise DependencyInjectionError("worker config") from e
|
|
53
|
+
|
|
54
|
+
|
|
43
55
|
async def set_es_client(worker_config: WorkerConfig) -> ESClient:
|
|
44
56
|
client = worker_config.to_es_client()
|
|
45
57
|
ES_CLIENT.set(client)
|
|
@@ -3,6 +3,8 @@ import re
|
|
|
3
3
|
from collections.abc import Callable, Iterable
|
|
4
4
|
from importlib.metadata import entry_points
|
|
5
5
|
|
|
6
|
+
from .config import WorkerConfig
|
|
7
|
+
from .dependencies import set_worker_config
|
|
6
8
|
from .types_ import ContextManagerFactory
|
|
7
9
|
from .utils import ActivityWithProgress
|
|
8
10
|
|
|
@@ -11,9 +13,11 @@ logger = logging.getLogger(__name__)
|
|
|
11
13
|
Activity = ActivityWithProgress | Callable | type
|
|
12
14
|
|
|
13
15
|
_DEPENDENCIES = "dependencies"
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
16
|
+
_WORKER_CONFIGS = "worker_configs"
|
|
17
|
+
_WORKFLOW_GROUP = "datashare.workflows"
|
|
18
|
+
_ACTIVITIES_GROUP = "datashare.activities"
|
|
19
|
+
_DEPENDENCIES_GROUP = "datashare.dependencies"
|
|
20
|
+
_WORKER_CONFIGS_GROUP = "datashare.worker_configs"
|
|
17
21
|
|
|
18
22
|
_RegisteredWorkflow = tuple[str, type]
|
|
19
23
|
_RegisteredActivity = tuple[str, Activity]
|
|
@@ -22,11 +26,16 @@ _Discovery = tuple[
|
|
|
22
26
|
Iterable[_RegisteredWorkflow] | None,
|
|
23
27
|
Iterable[_RegisteredActivity] | None,
|
|
24
28
|
_Dependencies | None,
|
|
29
|
+
type[WorkerConfig],
|
|
25
30
|
]
|
|
26
31
|
|
|
27
32
|
|
|
28
33
|
def discover(
|
|
29
|
-
wf_names: list[str] | None,
|
|
34
|
+
wf_names: list[str] | None,
|
|
35
|
+
*,
|
|
36
|
+
act_names: list[str] | None,
|
|
37
|
+
deps_name: str | None,
|
|
38
|
+
worker_config_name: str | None,
|
|
30
39
|
) -> _Discovery:
|
|
31
40
|
discovered = ""
|
|
32
41
|
wfs = None
|
|
@@ -58,11 +67,13 @@ def discover(
|
|
|
58
67
|
if wf_names:
|
|
59
68
|
msg += "workflow patterns " + ", ".join(wf_names) + " "
|
|
60
69
|
if act_names:
|
|
61
|
-
msg
|
|
70
|
+
msg += "activity patterns " + ", ".join(act_names)
|
|
62
71
|
raise ValueError(msg)
|
|
63
|
-
deps =
|
|
72
|
+
deps = []
|
|
64
73
|
if deps_name is not None:
|
|
65
74
|
deps = discover_dependencies(deps_name)
|
|
75
|
+
if set_worker_config not in deps:
|
|
76
|
+
deps.append(set_worker_config)
|
|
66
77
|
if deps:
|
|
67
78
|
n_deps = len(deps)
|
|
68
79
|
discovered += "\n"
|
|
@@ -71,13 +82,18 @@ def discover(
|
|
|
71
82
|
f"- {n_deps} dependenc{'ies' if n_deps > 1 else 'y'}:"
|
|
72
83
|
f" {', '.join(deps_names)}"
|
|
73
84
|
)
|
|
85
|
+
if worker_config_name is not None:
|
|
86
|
+
worker_config_cls = discover_worker_configs(worker_config_name)
|
|
87
|
+
discovered += f"- worker config class: {worker_config_cls}"
|
|
88
|
+
else:
|
|
89
|
+
worker_config_cls = WorkerConfig
|
|
74
90
|
logger.info("discovered:\n%s", discovered)
|
|
75
|
-
return wfs, acts, deps
|
|
91
|
+
return wfs, acts, deps, worker_config_cls
|
|
76
92
|
|
|
77
93
|
|
|
78
94
|
def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
|
|
79
95
|
pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
|
|
80
|
-
impls = entry_points(group=
|
|
96
|
+
impls = entry_points(group=_WORKFLOW_GROUP)
|
|
81
97
|
registered = []
|
|
82
98
|
for wf_impls in impls:
|
|
83
99
|
wf_impls = wf_impls.load() # noqa: PLW2901
|
|
@@ -93,7 +109,7 @@ def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
|
|
|
93
109
|
|
|
94
110
|
def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
|
|
95
111
|
pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
|
|
96
|
-
impls = entry_points(group=
|
|
112
|
+
impls = entry_points(group=_ACTIVITIES_GROUP)
|
|
97
113
|
registered = []
|
|
98
114
|
for act_impls in impls:
|
|
99
115
|
act_impls = act_impls.load() # noqa: PLW2901
|
|
@@ -108,9 +124,9 @@ def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
|
|
|
108
124
|
|
|
109
125
|
|
|
110
126
|
def discover_dependencies(name: str) -> _Dependencies:
|
|
111
|
-
impls = entry_points(name=_DEPENDENCIES, group=
|
|
127
|
+
impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUP)
|
|
112
128
|
if not impls:
|
|
113
|
-
available_impls = entry_points(group=
|
|
129
|
+
available_impls = entry_points(group=_DEPENDENCIES_GROUP)
|
|
114
130
|
msg = (
|
|
115
131
|
f'failed to find dependency: "{name}", '
|
|
116
132
|
f"available dependencies: {available_impls}"
|
|
@@ -131,6 +147,30 @@ def discover_dependencies(name: str) -> _Dependencies:
|
|
|
131
147
|
raise LookupError(msg) from e
|
|
132
148
|
|
|
133
149
|
|
|
150
|
+
def discover_worker_configs(name: str) -> type[WorkerConfig]:
|
|
151
|
+
impls = entry_points(name=_WORKER_CONFIGS, group=_WORKER_CONFIGS_GROUP)
|
|
152
|
+
if not impls:
|
|
153
|
+
available_impls = entry_points(group=_WORKER_CONFIGS_GROUP)
|
|
154
|
+
msg = (
|
|
155
|
+
f'failed to find worker config: "{name}", '
|
|
156
|
+
f"available dependencies: {available_impls}"
|
|
157
|
+
)
|
|
158
|
+
raise LookupError(msg)
|
|
159
|
+
if len(impls) > 1:
|
|
160
|
+
msg = f'found multiple worker configs for name "{name}": {impls}'
|
|
161
|
+
raise ValueError(msg)
|
|
162
|
+
deps_registry = impls[_WORKER_CONFIGS].load()
|
|
163
|
+
try:
|
|
164
|
+
return deps_registry[name]
|
|
165
|
+
except KeyError as e:
|
|
166
|
+
available = list(deps_registry)
|
|
167
|
+
msg = (
|
|
168
|
+
f'failed to find worker config for name "{name}", available worker '
|
|
169
|
+
f"configs: {available}"
|
|
170
|
+
)
|
|
171
|
+
raise LookupError(msg) from e
|
|
172
|
+
|
|
173
|
+
|
|
134
174
|
def _parse_wf_name(wf_type: type) -> str:
|
|
135
175
|
if not isinstance(wf_type, type):
|
|
136
176
|
msg = (
|
|
Binary file
|
|
@@ -43,6 +43,13 @@ documentation for more details:
|
|
|
43
43
|
_ACTIVITY_THREAD_NAME_PREFIX = "datashare-activity-worker-"
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
class DatashareWorker(Worker):
|
|
47
|
+
async def is_done(self) -> None:
|
|
48
|
+
if self._async_context_run_task is None:
|
|
49
|
+
raise ValueError("worker is not running")
|
|
50
|
+
await self._async_context_run_task
|
|
51
|
+
|
|
52
|
+
|
|
46
53
|
def datashare_worker(
|
|
47
54
|
client: TemporalClient,
|
|
48
55
|
worker_id: str,
|
|
@@ -53,7 +60,7 @@ def datashare_worker(
|
|
|
53
60
|
# Scale horizontally be default for activities, each worker processes one activity
|
|
54
61
|
# at a time
|
|
55
62
|
max_concurrent_io_activities: int = 10,
|
|
56
|
-
) ->
|
|
63
|
+
) -> DatashareWorker:
|
|
57
64
|
if workflows is None:
|
|
58
65
|
workflows = []
|
|
59
66
|
if activities is None:
|
|
@@ -77,7 +84,7 @@ def datashare_worker(
|
|
|
77
84
|
if workflows:
|
|
78
85
|
logger.warning(_SEPARATE_IO_AND_CPU_WORKERS)
|
|
79
86
|
|
|
80
|
-
return
|
|
87
|
+
return DatashareWorker(
|
|
81
88
|
client,
|
|
82
89
|
identity=worker_id,
|
|
83
90
|
workflows=workflows,
|
|
@@ -124,21 +131,21 @@ def init_activity(
|
|
|
124
131
|
|
|
125
132
|
|
|
126
133
|
@asynccontextmanager
|
|
127
|
-
async def
|
|
134
|
+
async def worker_context(
|
|
128
135
|
worker_id: str,
|
|
129
136
|
*,
|
|
130
137
|
activities: list[Callable[..., Any] | None] | None = None,
|
|
131
138
|
workflows: list[type] | None = None,
|
|
132
|
-
|
|
139
|
+
worker_config: WorkerConfig,
|
|
133
140
|
client: TemporalClient,
|
|
134
141
|
event_loop: AbstractEventLoop,
|
|
135
142
|
task_queue: str,
|
|
136
143
|
dependencies: list[ContextManagerFactory] | None = None,
|
|
137
|
-
) -> AsyncGenerator[
|
|
144
|
+
) -> AsyncGenerator[DatashareWorker, None]:
|
|
138
145
|
deps_cm = (
|
|
139
146
|
with_dependencies(
|
|
140
147
|
dependencies,
|
|
141
|
-
worker_config=
|
|
148
|
+
worker_config=worker_config,
|
|
142
149
|
worker_id=worker_id,
|
|
143
150
|
event_loop=event_loop,
|
|
144
151
|
)
|
|
@@ -159,9 +166,10 @@ async def bootstrap_worker(
|
|
|
159
166
|
workflows=workflows,
|
|
160
167
|
activities=acts,
|
|
161
168
|
task_queue=task_queue,
|
|
162
|
-
max_concurrent_io_activities=
|
|
169
|
+
max_concurrent_io_activities=worker_config.max_concurrent_io_activities,
|
|
163
170
|
)
|
|
164
|
-
|
|
171
|
+
async with worker:
|
|
172
|
+
yield worker
|
|
165
173
|
|
|
166
174
|
|
|
167
175
|
@asynccontextmanager
|
|
Binary file
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|