datashare-python 0.1.3__tar.gz → 0.2.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datashare_python-0.2.10/.gitignore +21 -0
- datashare_python-0.2.10/PKG-INFO +20 -0
- datashare_python-0.2.10/datashare_python/.gitignore +1 -0
- {datashare_python-0.1.3 → datashare_python-0.2.10}/datashare_python/__main__.py +2 -2
- datashare_python-0.2.10/datashare_python/cli/__init__.py +52 -0
- datashare_python-0.2.10/datashare_python/cli/local.py +33 -0
- datashare_python-0.2.10/datashare_python/cli/project.py +34 -0
- datashare_python-0.1.3/datashare_python/cli/tasks.py → datashare_python-0.2.10/datashare_python/cli/task.py +17 -19
- {datashare_python-0.1.3 → datashare_python-0.2.10}/datashare_python/cli/utils.py +7 -5
- datashare_python-0.2.10/datashare_python/cli/worker.py +114 -0
- datashare_python-0.2.10/datashare_python/config.py +93 -0
- datashare_python-0.2.10/datashare_python/conftest.py +285 -0
- datashare_python-0.2.10/datashare_python/constants.py +14 -0
- datashare_python-0.2.10/datashare_python/dependencies.py +96 -0
- datashare_python-0.2.10/datashare_python/discovery.py +79 -0
- datashare_python-0.2.10/datashare_python/exceptions.py +18 -0
- datashare_python-0.2.10/datashare_python/local_client.py +69 -0
- datashare_python-0.2.10/datashare_python/objects.py +155 -0
- datashare_python-0.2.10/datashare_python/task_client.py +233 -0
- datashare_python-0.2.10/datashare_python/template.py +93 -0
- datashare_python-0.2.10/datashare_python/types_.py +29 -0
- datashare_python-0.2.10/datashare_python/utils.py +372 -0
- datashare_python-0.2.10/datashare_python/worker.py +74 -0
- datashare_python-0.2.10/pyproject.toml +75 -0
- datashare_python-0.2.10/tests/cli/test_project.py +23 -0
- datashare_python-0.2.10/tests/cli/test_tasks.py +241 -0
- datashare_python-0.2.10/tests/cli/test_worker.py +46 -0
- datashare_python-0.2.10/tests/conftest.py +14 -0
- datashare_python-0.2.10/tests/test_discovery.py +44 -0
- datashare_python-0.2.10/tests/test_object.py +28 -0
- {datashare_python-0.1.3 → datashare_python-0.2.10}/tests/test_task_client.py +89 -43
- datashare_python-0.2.10/tests/test_template.py +38 -0
- datashare_python-0.2.10/tests/test_utils.py +17 -0
- datashare_python-0.2.10/tests/test_worker.py +68 -0
- datashare_python-0.2.10/uv.lock +2899 -0
- datashare_python-0.1.3/PKG-INFO +0 -84
- datashare_python-0.1.3/README.md +0 -61
- datashare_python-0.1.3/datashare_python/app.py +0 -85
- datashare_python-0.1.3/datashare_python/cli/__init__.py +0 -30
- datashare_python-0.1.3/datashare_python/config.py +0 -60
- datashare_python-0.1.3/datashare_python/constants.py +0 -6
- datashare_python-0.1.3/datashare_python/objects.py +0 -49
- datashare_python-0.1.3/datashare_python/task_client.py +0 -124
- datashare_python-0.1.3/datashare_python/tasks/__init__.py +0 -2
- datashare_python-0.1.3/datashare_python/tasks/classify_docs.py +0 -227
- datashare_python-0.1.3/datashare_python/tasks/dependencies.py +0 -110
- datashare_python-0.1.3/datashare_python/tasks/translate_docs.py +0 -223
- datashare_python-0.1.3/datashare_python/utils.py +0 -69
- datashare_python-0.1.3/pyproject.toml +0 -81
- datashare_python-0.1.3/tests/cli/test_tasks.py +0 -193
- datashare_python-0.1.3/tests/conftest.py +0 -281
- datashare_python-0.1.3/tests/tasks/test_translate_docs.py +0 -37
- datashare_python-0.1.3/tests/test_tasks.py +0 -181
- datashare_python-0.1.3/tests/test_utils.py +0 -31
- /datashare_python-0.1.3/datashare_python/__init__.py → /datashare_python-0.2.10/README.md +0 -0
- {datashare_python-0.1.3/tests → datashare_python-0.2.10/datashare_python}/__init__.py +0 -0
- {datashare_python-0.1.3/tests/cli → datashare_python-0.2.10/tests}/__init__.py +0 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datashare-python
|
|
3
|
+
Version: 0.2.10
|
|
4
|
+
Summary: Manage Python tasks and local resources in Datashare
|
|
5
|
+
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
|
+
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
7
|
+
Project-URL: Repository, https://github.com/ICIJ/datashare-python
|
|
8
|
+
Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
|
|
9
|
+
Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
|
|
10
|
+
Requires-Python: <4,>=3.11
|
|
11
|
+
Requires-Dist: aiohttp~=3.11.9
|
|
12
|
+
Requires-Dist: aiostream~=0.6.4
|
|
13
|
+
Requires-Dist: alive-progress~=3.2.0
|
|
14
|
+
Requires-Dist: icij-common[elasticsearch]~=0.7.3
|
|
15
|
+
Requires-Dist: nest-asyncio~=1.6.0
|
|
16
|
+
Requires-Dist: python-json-logger~=4.0.0
|
|
17
|
+
Requires-Dist: temporalio~=1.23.0
|
|
18
|
+
Requires-Dist: tomlkit>=0.14.0
|
|
19
|
+
Requires-Dist: typer~=0.15.4
|
|
20
|
+
Requires-Dist: worker-template
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
worker-template.tar.gz
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import importlib.metadata
|
|
2
|
+
import os
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
from icij_common.logging_utils import setup_loggers
|
|
7
|
+
|
|
8
|
+
import datashare_python
|
|
9
|
+
from datashare_python.cli.local import local_app
|
|
10
|
+
from datashare_python.cli.project import project_app
|
|
11
|
+
from datashare_python.cli.task import task_app
|
|
12
|
+
from datashare_python.cli.utils import AsyncTyper
|
|
13
|
+
from datashare_python.cli.worker import worker_app
|
|
14
|
+
|
|
15
|
+
cli_app = AsyncTyper(
|
|
16
|
+
context_settings={"help_option_names": ["-h", "--help"]},
|
|
17
|
+
pretty_exceptions_enable=False,
|
|
18
|
+
)
|
|
19
|
+
cli_app.add_typer(local_app)
|
|
20
|
+
cli_app.add_typer(project_app)
|
|
21
|
+
cli_app.add_typer(task_app)
|
|
22
|
+
cli_app.add_typer(worker_app)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def version_callback(value: bool) -> None: # noqa: FBT001
|
|
26
|
+
if value:
|
|
27
|
+
package_version = importlib.metadata.version(datashare_python.__name__)
|
|
28
|
+
print(package_version)
|
|
29
|
+
raise typer.Exit()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def pretty_exc_callback(value: bool) -> None: # noqa: FBT001
|
|
33
|
+
if not value:
|
|
34
|
+
os.environ["TYPER_STANDARD_TRACEBACK"] = "1"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@cli_app.callback()
|
|
38
|
+
def main(
|
|
39
|
+
version: Annotated[ # noqa: ARG001
|
|
40
|
+
bool | None,
|
|
41
|
+
typer.Option("--version", callback=version_callback, is_eager=True),
|
|
42
|
+
] = None,
|
|
43
|
+
*,
|
|
44
|
+
pretty_exceptions: Annotated[ # noqa: ARG001
|
|
45
|
+
bool,
|
|
46
|
+
typer.Option(
|
|
47
|
+
"--pretty-exceptions", callback=pretty_exc_callback, is_eager=True
|
|
48
|
+
),
|
|
49
|
+
] = False,
|
|
50
|
+
) -> None:
|
|
51
|
+
"""Datashare Python CLI."""
|
|
52
|
+
setup_loggers(["__main__", datashare_python.__name__])
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import Annotated
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from datashare_python.cli.utils import AsyncTyper
|
|
6
|
+
from datashare_python.constants import DEFAULT_NAMESPACE, DEFAULT_TEMPORAL_ADDRESS
|
|
7
|
+
from datashare_python.local_client import LocalClient
|
|
8
|
+
|
|
9
|
+
_REGISTER_NAMESPACE_HELP = "register namespace"
|
|
10
|
+
_TEMPORAL_URL_HELP = "address for temporal server"
|
|
11
|
+
_NAMESPACE_HELP = "namespace name"
|
|
12
|
+
_LOCAL = "local"
|
|
13
|
+
|
|
14
|
+
local_app = AsyncTyper(name=_LOCAL)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@local_app.async_command(help=_REGISTER_NAMESPACE_HELP)
|
|
18
|
+
async def register_namespace(
|
|
19
|
+
namespace: Annotated[
|
|
20
|
+
str, typer.Option("--namespace", "-n", help=_NAMESPACE_HELP)
|
|
21
|
+
] = DEFAULT_NAMESPACE,
|
|
22
|
+
temporal_address: Annotated[
|
|
23
|
+
str, typer.Option("--temporal-address", "-a", help=_TEMPORAL_URL_HELP)
|
|
24
|
+
] = DEFAULT_TEMPORAL_ADDRESS,
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Create namespace
|
|
27
|
+
|
|
28
|
+
:param namespace: namespace
|
|
29
|
+
:param temporal_address: target host
|
|
30
|
+
"""
|
|
31
|
+
client = LocalClient()
|
|
32
|
+
|
|
33
|
+
await client.register_namespace(temporal_address, namespace)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Annotated
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
from datashare_python.template import init_project
|
|
8
|
+
|
|
9
|
+
from .utils import AsyncTyper, eprint
|
|
10
|
+
|
|
11
|
+
_INIT_PROJECT_HELP = "initialize a new worker project from a template"
|
|
12
|
+
_INIT_PROJECT_NAME_HELP = "name of the new worker package"
|
|
13
|
+
_INIT_PROJECT_PATH_HELP = "path where project will be created"
|
|
14
|
+
|
|
15
|
+
_PROJECT = "project"
|
|
16
|
+
|
|
17
|
+
project_app = AsyncTyper(name=_PROJECT)
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@project_app.async_command(help=_INIT_PROJECT_HELP)
|
|
23
|
+
async def init(
|
|
24
|
+
name: Annotated[str, typer.Argument(help=_INIT_PROJECT_NAME_HELP)],
|
|
25
|
+
path: Annotated[
|
|
26
|
+
Path | None, typer.Option("--path", "-p", help=_INIT_PROJECT_PATH_HELP)
|
|
27
|
+
] = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
if path is None:
|
|
30
|
+
path = Path(".")
|
|
31
|
+
eprint(f"Initializing {name} worker project in {path.absolute()}...")
|
|
32
|
+
init_project(name, path)
|
|
33
|
+
eprint(f"Project {name} initialized !")
|
|
34
|
+
print(path)
|
|
@@ -4,15 +4,14 @@ import logging
|
|
|
4
4
|
import sys
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from traceback import FrameSummary, StackSummary
|
|
7
|
-
from typing import Annotated, Any
|
|
7
|
+
from typing import Annotated, Any
|
|
8
8
|
|
|
9
9
|
import typer
|
|
10
10
|
from alive_progress import alive_bar
|
|
11
|
-
from icij_worker import TaskState
|
|
12
|
-
from icij_worker.objects import READY_STATES, Task, TaskError
|
|
13
11
|
|
|
14
12
|
from datashare_python.cli.utils import AsyncTyper, eprint
|
|
15
13
|
from datashare_python.constants import PYTHON_TASK_GROUP
|
|
14
|
+
from datashare_python.objects import READY_STATES, Task, TaskError, TaskState
|
|
16
15
|
from datashare_python.task_client import DatashareTaskClient
|
|
17
16
|
|
|
18
17
|
logger = logging.getLogger(__name__)
|
|
@@ -40,16 +39,16 @@ async def start(
|
|
|
40
39
|
name: Annotated[str, typer.Argument(help=_NAME_HELP)],
|
|
41
40
|
args: Annotated[TaskArgs, typer.Argument(help=_ARGS_HELP)] = None,
|
|
42
41
|
group: Annotated[
|
|
43
|
-
|
|
42
|
+
str | None,
|
|
44
43
|
typer.Option("--group", "-g", help=_GROUP_HELP),
|
|
45
44
|
] = PYTHON_TASK_GROUP.name,
|
|
46
45
|
ds_address: Annotated[
|
|
47
46
|
str, typer.Option("--ds-address", "-a", help=_DS_URL_HELP)
|
|
48
47
|
] = DEFAULT_DS_ADDRESS,
|
|
49
48
|
ds_api_key: Annotated[
|
|
50
|
-
|
|
49
|
+
str | None, typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
|
|
51
50
|
] = None,
|
|
52
|
-
):
|
|
51
|
+
) -> None:
|
|
53
52
|
match args:
|
|
54
53
|
case str():
|
|
55
54
|
as_path = Path(name)
|
|
@@ -76,16 +75,16 @@ async def watch(
|
|
|
76
75
|
str, typer.Option("--ds-address", "-a", help=_DS_URL_HELP)
|
|
77
76
|
] = DEFAULT_DS_ADDRESS,
|
|
78
77
|
ds_api_key: Annotated[
|
|
79
|
-
|
|
78
|
+
str | None, typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
|
|
80
79
|
] = None,
|
|
81
80
|
polling_interval_s: Annotated[
|
|
82
81
|
float, typer.Option("--polling-interval-s", "-p", help=_POLLING_INTERVAL_S_HELP)
|
|
83
82
|
] = 1.0,
|
|
84
|
-
):
|
|
83
|
+
) -> None:
|
|
85
84
|
client = DatashareTaskClient(ds_address, api_key=ds_api_key)
|
|
86
85
|
async with client:
|
|
87
86
|
task = await client.get_task(task_id)
|
|
88
|
-
if task.state
|
|
87
|
+
if task.state in READY_STATES:
|
|
89
88
|
await _handle_ready(task, client, already_done=True)
|
|
90
89
|
await _handle_alive(task, client, polling_interval_s)
|
|
91
90
|
print(task_id)
|
|
@@ -98,19 +97,19 @@ async def result(
|
|
|
98
97
|
str, typer.Option("--ds-address", "-a", help=_DS_URL_HELP)
|
|
99
98
|
] = DEFAULT_DS_ADDRESS,
|
|
100
99
|
ds_api_key: Annotated[
|
|
101
|
-
|
|
100
|
+
str | None, typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
|
|
102
101
|
] = None,
|
|
103
102
|
) -> Any:
|
|
104
103
|
client = DatashareTaskClient(ds_address, api_key=ds_api_key)
|
|
105
104
|
async with client:
|
|
106
105
|
res = await client.get_task_result(task_id)
|
|
107
|
-
if isinstance(res,
|
|
106
|
+
if isinstance(res, dict | list):
|
|
108
107
|
res = json.dumps(res, indent=2)
|
|
109
108
|
print(res)
|
|
110
109
|
|
|
111
110
|
|
|
112
111
|
async def _handle_ready(
|
|
113
|
-
task: Task, client: DatashareTaskClient, already_done: bool = False
|
|
112
|
+
task: Task, client: DatashareTaskClient, *, already_done: bool = False
|
|
114
113
|
) -> None:
|
|
115
114
|
match task.state:
|
|
116
115
|
case TaskState.ERROR:
|
|
@@ -126,27 +125,26 @@ async def _handle_ready(
|
|
|
126
125
|
raise ValueError(f"Unexpected task state {task.state}")
|
|
127
126
|
|
|
128
127
|
|
|
129
|
-
async def _handle_error(task, client: DatashareTaskClient):
|
|
128
|
+
async def _handle_error(task: Task, client: DatashareTaskClient) -> None:
|
|
130
129
|
error = await client.get_task_error(task.id)
|
|
131
130
|
eprint(
|
|
132
|
-
f"Task({task.id}) failed with the following"
|
|
133
|
-
f" error:\n\n{_format_error(error)}"
|
|
131
|
+
f"Task({task.id}) failed with the following error:\n\n{_format_error(error)}"
|
|
134
132
|
)
|
|
135
133
|
eprint(f"Task({task.id}) ❌")
|
|
136
134
|
raise typer.Exit(code=1)
|
|
137
135
|
|
|
138
136
|
|
|
139
|
-
async def _handle_cancelled(task):
|
|
137
|
+
async def _handle_cancelled(task: Task) -> None:
|
|
140
138
|
eprint(f"Task({task.id}) was cancelled !")
|
|
141
139
|
eprint(f"Task({task.id}) 🛑")
|
|
142
140
|
raise typer.Exit(code=1)
|
|
143
141
|
|
|
144
142
|
|
|
145
|
-
async def _handle_already_done(task):
|
|
143
|
+
async def _handle_already_done(task: Task) -> None:
|
|
146
144
|
eprint(f"Task({task.id}) ✅ is already completed !")
|
|
147
145
|
|
|
148
146
|
|
|
149
|
-
async def _handle_done(task):
|
|
147
|
+
async def _handle_done(task: Task) -> None:
|
|
150
148
|
eprint(f"Task({task.id}) 🛬")
|
|
151
149
|
eprint(f"Task({task.id}) ✅")
|
|
152
150
|
|
|
@@ -166,7 +164,7 @@ async def _handle_alive(
|
|
|
166
164
|
task = await client.get_task(task.id)
|
|
167
165
|
task_state = task.state
|
|
168
166
|
progress = task.progress or 0.0
|
|
169
|
-
bar(progress)
|
|
167
|
+
bar(progress)
|
|
170
168
|
await asyncio.sleep(polling_interval_s)
|
|
171
169
|
if task_state in READY_STATES:
|
|
172
170
|
await _handle_ready(task, client)
|
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import concurrent.futures
|
|
3
3
|
import sys
|
|
4
|
+
from collections.abc import Callable
|
|
4
5
|
from functools import wraps
|
|
6
|
+
from typing import Any
|
|
5
7
|
|
|
6
8
|
import typer
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class AsyncTyper(typer.Typer):
|
|
10
|
-
def async_command(self, *args, **kwargs):
|
|
11
|
-
def decorator(async_func):
|
|
12
|
+
def async_command(self, *args, **kwargs) -> Callable[[Callable], Callable]:
|
|
13
|
+
def decorator(async_func: Callable) -> Callable:
|
|
12
14
|
@wraps(async_func)
|
|
13
|
-
def sync_func(*_args, **_kwargs):
|
|
15
|
+
def sync_func(*_args, **_kwargs) -> Any:
|
|
14
16
|
res = asyncio.run(async_func(*_args, **_kwargs))
|
|
15
17
|
return res
|
|
16
18
|
|
|
@@ -20,14 +22,14 @@ class AsyncTyper(typer.Typer):
|
|
|
20
22
|
return decorator
|
|
21
23
|
|
|
22
24
|
|
|
23
|
-
def eprint(*args, **kwargs):
|
|
25
|
+
def eprint(*args, **kwargs) -> None:
|
|
24
26
|
print(*args, file=sys.stderr, **kwargs)
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
def _to_concurrent(
|
|
28
30
|
fut: asyncio.Future, loop: asyncio.AbstractEventLoop
|
|
29
31
|
) -> concurrent.futures.Future:
|
|
30
|
-
async def wait():
|
|
32
|
+
async def wait() -> None:
|
|
31
33
|
await fut
|
|
32
34
|
|
|
33
35
|
return asyncio.run_coroutine_threadsafe(wait(), loop)
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Annotated
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
|
|
6
|
+
from datashare_python.constants import DEFAULT_NAMESPACE, DEFAULT_TEMPORAL_ADDRESS
|
|
7
|
+
from datashare_python.discovery import discover_activities, discover_workflows
|
|
8
|
+
from datashare_python.types_ import TemporalClient
|
|
9
|
+
from datashare_python.worker import datashare_worker
|
|
10
|
+
|
|
11
|
+
from .utils import AsyncTyper
|
|
12
|
+
|
|
13
|
+
_START_WORKER_HELP = "start a datashare worker"
|
|
14
|
+
|
|
15
|
+
_LIST_WORKFLOWS_HELP = "list registered workflows"
|
|
16
|
+
_LIST_WORKFLOW_NAMES_HELP = "workflow names filters (supports regexes)"
|
|
17
|
+
|
|
18
|
+
_LIST_ACTIVITIES_HELP = "list registered activities"
|
|
19
|
+
_LIST_ACTIVITY_NAMES_HELP = "activity names filters (supports regexes)"
|
|
20
|
+
|
|
21
|
+
_START_WORKER_WORKFLOWS_HELP = "workflow names run by the worker (supports regexes)"
|
|
22
|
+
_START_WORKER_ACTIVITIES_HELP = "activity names run by the worker (supports regexes)"
|
|
23
|
+
_WORKER_QUEUE_HELP = "worker task queue"
|
|
24
|
+
_WORKER_MAX_ACTIVITIES_HELP = (
|
|
25
|
+
"maximum number of concurrent activities/tasks"
|
|
26
|
+
" concurrently run by the worker. Defaults to 1 to encourage horizontal scaling."
|
|
27
|
+
)
|
|
28
|
+
_TEMPORAL_NAMESPACE_HELP = "worker temporal namespace"
|
|
29
|
+
|
|
30
|
+
_TEMPORAL_URL_HELP = "address for temporal server"
|
|
31
|
+
_NAMESPACE_HELP = "namespace name"
|
|
32
|
+
_WORKER = "worker"
|
|
33
|
+
|
|
34
|
+
worker_app = AsyncTyper(name=_WORKER)
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@worker_app.async_command(help=_LIST_WORKFLOWS_HELP)
|
|
40
|
+
async def list_workflows(
|
|
41
|
+
names: Annotated[list[str], typer.Argument(help=_LIST_WORKFLOW_NAMES_HELP)],
|
|
42
|
+
) -> None:
|
|
43
|
+
workflows = [wf_name for wf_name, _ in discover_workflows(names)]
|
|
44
|
+
if not workflows:
|
|
45
|
+
out = """Couldn't find any registered workflow 🤔.
|
|
46
|
+
Make sure your workflow plugins correctly expose workflow entry points, refer to the \
|
|
47
|
+
documentation to learn how to do so."""
|
|
48
|
+
print(out)
|
|
49
|
+
return
|
|
50
|
+
workflows = "\n".join(f"- {wf}" for wf in workflows)
|
|
51
|
+
out = f"Found {len(workflows)} registered workflows:\n{workflows}"
|
|
52
|
+
print(out)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@worker_app.async_command(help=_LIST_ACTIVITIES_HELP)
|
|
56
|
+
async def list_activities(
|
|
57
|
+
names: Annotated[list[str], typer.Argument(help=_LIST_ACTIVITY_NAMES_HELP)],
|
|
58
|
+
) -> None:
|
|
59
|
+
activities = [act_name for act_name, _ in discover_activities(names)]
|
|
60
|
+
if not activities:
|
|
61
|
+
out = """Couldn't find any registered activity 🤔.
|
|
62
|
+
Make sure your activity plugins correctly expose activity entry points, refer \
|
|
63
|
+
to the documentation to learn how to do so."""
|
|
64
|
+
print(out)
|
|
65
|
+
return
|
|
66
|
+
activities = "\n".join(f"- {act}" for act in activities)
|
|
67
|
+
out = f"Found {len(activities)} registered activities:\n{activities}"
|
|
68
|
+
print(out)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@worker_app.async_command(help=_START_WORKER_HELP)
|
|
72
|
+
async def start(
|
|
73
|
+
workflows: Annotated[list[str], typer.Option(help=_START_WORKER_WORKFLOWS_HELP)],
|
|
74
|
+
activities: Annotated[list[str], typer.Option(help=_START_WORKER_ACTIVITIES_HELP)],
|
|
75
|
+
queue: Annotated[str, typer.Option("--queue", "-q", help=_WORKER_QUEUE_HELP)],
|
|
76
|
+
temporal_address: Annotated[
|
|
77
|
+
str, typer.Option("--temporal-address", "-a", help=_TEMPORAL_URL_HELP)
|
|
78
|
+
] = DEFAULT_TEMPORAL_ADDRESS,
|
|
79
|
+
namespace: Annotated[
|
|
80
|
+
str, typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP)
|
|
81
|
+
] = DEFAULT_NAMESPACE,
|
|
82
|
+
max_concurrent_activities: Annotated[
|
|
83
|
+
int, typer.Option("--max-activities", help=_WORKER_MAX_ACTIVITIES_HELP)
|
|
84
|
+
] = 1,
|
|
85
|
+
) -> None:
|
|
86
|
+
wf_names, wfs = zip(*discover_workflows(workflows), strict=False)
|
|
87
|
+
registered = ""
|
|
88
|
+
if wf_names:
|
|
89
|
+
n_wfs = len(wf_names)
|
|
90
|
+
registered += (
|
|
91
|
+
f"- {n_wfs} workflow{'s' if n_wfs > 1 else ''}: {','.join(wf_names)}"
|
|
92
|
+
)
|
|
93
|
+
act_names, acts = zip(*discover_activities(activities), strict=False)
|
|
94
|
+
if act_names:
|
|
95
|
+
if registered:
|
|
96
|
+
registered += "\n"
|
|
97
|
+
i = len(act_names)
|
|
98
|
+
registered += f"- {i} activit{'ies' if i > 1 else 'y'}: {','.join(act_names)}"
|
|
99
|
+
if not acts and not wfs:
|
|
100
|
+
raise ValueError("Couldn't find any registered activity or workflow.")
|
|
101
|
+
logger.info("Starting datashare worker running:\n%s", registered)
|
|
102
|
+
client = await TemporalClient.connect(temporal_address, namespace=namespace)
|
|
103
|
+
worker = datashare_worker(
|
|
104
|
+
client,
|
|
105
|
+
workflows=wfs,
|
|
106
|
+
activities=acts,
|
|
107
|
+
task_queue=queue,
|
|
108
|
+
max_concurrent_activities=max_concurrent_activities,
|
|
109
|
+
)
|
|
110
|
+
try:
|
|
111
|
+
await worker.run()
|
|
112
|
+
except Exception as e: # noqa: BLE001
|
|
113
|
+
await worker.shutdown()
|
|
114
|
+
raise e
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
from typing import ClassVar
|
|
2
|
+
|
|
3
|
+
from icij_common.es import ESClient
|
|
4
|
+
from icij_common.pydantic_utils import ICIJSettings
|
|
5
|
+
from pydantic import Field, PrivateAttr
|
|
6
|
+
from pydantic_settings import SettingsConfigDict
|
|
7
|
+
from temporalio.contrib.pydantic import pydantic_data_converter
|
|
8
|
+
|
|
9
|
+
import datashare_python
|
|
10
|
+
|
|
11
|
+
from .objects import BaseModel
|
|
12
|
+
from .task_client import DatashareTaskClient
|
|
13
|
+
from .types_ import TemporalClient
|
|
14
|
+
from .utils import LogWithWorkerIDMixin
|
|
15
|
+
|
|
16
|
+
_ALL_LOGGERS = [datashare_python.__name__]
|
|
17
|
+
|
|
18
|
+
DS_WORKER_SETTINGS_CONFIG = SettingsConfigDict(
|
|
19
|
+
env_prefix="DS_WORKER_",
|
|
20
|
+
env_nested_delimiter="__",
|
|
21
|
+
nested_model_default_partial_update=True,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ESClientConfig(BaseModel):
|
|
26
|
+
address: str = "http://localhost:9200"
|
|
27
|
+
default_page_size: int = 1000
|
|
28
|
+
keep_alive: str = "10m"
|
|
29
|
+
max_concurrency: int = 5
|
|
30
|
+
max_retries: int = 0
|
|
31
|
+
max_retry_wait_s: int | float = 60
|
|
32
|
+
timeout_s: int | float = 60 * 5
|
|
33
|
+
|
|
34
|
+
def to_es_client(self, api_key: str | None = None) -> ESClient:
|
|
35
|
+
client = ESClient(
|
|
36
|
+
hosts=[self.address],
|
|
37
|
+
pagination=self.default_page_size,
|
|
38
|
+
max_concurrency=self.max_concurrency,
|
|
39
|
+
keep_alive=self.keep_alive,
|
|
40
|
+
timeout=self.timeout_s,
|
|
41
|
+
max_retries=self.max_retries,
|
|
42
|
+
max_retry_wait_s=self.max_retry_wait_s,
|
|
43
|
+
api_key=api_key,
|
|
44
|
+
)
|
|
45
|
+
client.transport._verified_elasticsearch = True
|
|
46
|
+
return client
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DatashareClientConfig(BaseModel):
|
|
50
|
+
api_key: str | None = None
|
|
51
|
+
url: str = "http://datashare:8080"
|
|
52
|
+
|
|
53
|
+
def to_task_client(self) -> DatashareTaskClient:
|
|
54
|
+
return DatashareTaskClient(self.url, self.api_key)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class TemporalClientConfig(BaseModel):
|
|
58
|
+
host: str = "temporal:7233"
|
|
59
|
+
namespace: str = "datashare-default"
|
|
60
|
+
_client: TemporalClient | None = PrivateAttr(default=None)
|
|
61
|
+
|
|
62
|
+
async def to_client(self) -> TemporalClient:
|
|
63
|
+
if self._client is None:
|
|
64
|
+
self._client = await TemporalClient.connect(
|
|
65
|
+
target_host=self.host,
|
|
66
|
+
namespace=self.namespace,
|
|
67
|
+
data_converter=pydantic_data_converter,
|
|
68
|
+
)
|
|
69
|
+
return self._client
|
|
70
|
+
|
|
71
|
+
# For the lru_cache
|
|
72
|
+
def __hash__(self) -> int:
|
|
73
|
+
return id(self)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
|
|
77
|
+
model_config = DS_WORKER_SETTINGS_CONFIG
|
|
78
|
+
|
|
79
|
+
loggers: ClassVar[list[str]] = Field(_ALL_LOGGERS, frozen=True)
|
|
80
|
+
log_level: str = Field(default="INFO")
|
|
81
|
+
|
|
82
|
+
datashare: DatashareClientConfig = DatashareClientConfig()
|
|
83
|
+
elasticsearch: ESClientConfig = ESClientConfig()
|
|
84
|
+
temporal: TemporalClientConfig = TemporalClientConfig()
|
|
85
|
+
|
|
86
|
+
def to_es_client(self) -> ESClient:
|
|
87
|
+
return self.elasticsearch.to_es_client(self.datashare.api_key)
|
|
88
|
+
|
|
89
|
+
def to_task_client(self) -> DatashareTaskClient:
|
|
90
|
+
return self.datashare.to_task_client()
|
|
91
|
+
|
|
92
|
+
async def to_temporal_client(self) -> TemporalClient:
|
|
93
|
+
return await self.temporal.to_client()
|