datashare-python 0.1.3__tar.gz → 0.2.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. datashare_python-0.2.10/.gitignore +21 -0
  2. datashare_python-0.2.10/PKG-INFO +20 -0
  3. datashare_python-0.2.10/datashare_python/.gitignore +1 -0
  4. {datashare_python-0.1.3 → datashare_python-0.2.10}/datashare_python/__main__.py +2 -2
  5. datashare_python-0.2.10/datashare_python/cli/__init__.py +52 -0
  6. datashare_python-0.2.10/datashare_python/cli/local.py +33 -0
  7. datashare_python-0.2.10/datashare_python/cli/project.py +34 -0
  8. datashare_python-0.1.3/datashare_python/cli/tasks.py → datashare_python-0.2.10/datashare_python/cli/task.py +17 -19
  9. {datashare_python-0.1.3 → datashare_python-0.2.10}/datashare_python/cli/utils.py +7 -5
  10. datashare_python-0.2.10/datashare_python/cli/worker.py +114 -0
  11. datashare_python-0.2.10/datashare_python/config.py +93 -0
  12. datashare_python-0.2.10/datashare_python/conftest.py +285 -0
  13. datashare_python-0.2.10/datashare_python/constants.py +14 -0
  14. datashare_python-0.2.10/datashare_python/dependencies.py +96 -0
  15. datashare_python-0.2.10/datashare_python/discovery.py +79 -0
  16. datashare_python-0.2.10/datashare_python/exceptions.py +18 -0
  17. datashare_python-0.2.10/datashare_python/local_client.py +69 -0
  18. datashare_python-0.2.10/datashare_python/objects.py +155 -0
  19. datashare_python-0.2.10/datashare_python/task_client.py +233 -0
  20. datashare_python-0.2.10/datashare_python/template.py +93 -0
  21. datashare_python-0.2.10/datashare_python/types_.py +29 -0
  22. datashare_python-0.2.10/datashare_python/utils.py +372 -0
  23. datashare_python-0.2.10/datashare_python/worker.py +74 -0
  24. datashare_python-0.2.10/pyproject.toml +75 -0
  25. datashare_python-0.2.10/tests/cli/test_project.py +23 -0
  26. datashare_python-0.2.10/tests/cli/test_tasks.py +241 -0
  27. datashare_python-0.2.10/tests/cli/test_worker.py +46 -0
  28. datashare_python-0.2.10/tests/conftest.py +14 -0
  29. datashare_python-0.2.10/tests/test_discovery.py +44 -0
  30. datashare_python-0.2.10/tests/test_object.py +28 -0
  31. {datashare_python-0.1.3 → datashare_python-0.2.10}/tests/test_task_client.py +89 -43
  32. datashare_python-0.2.10/tests/test_template.py +38 -0
  33. datashare_python-0.2.10/tests/test_utils.py +17 -0
  34. datashare_python-0.2.10/tests/test_worker.py +68 -0
  35. datashare_python-0.2.10/uv.lock +2899 -0
  36. datashare_python-0.1.3/PKG-INFO +0 -84
  37. datashare_python-0.1.3/README.md +0 -61
  38. datashare_python-0.1.3/datashare_python/app.py +0 -85
  39. datashare_python-0.1.3/datashare_python/cli/__init__.py +0 -30
  40. datashare_python-0.1.3/datashare_python/config.py +0 -60
  41. datashare_python-0.1.3/datashare_python/constants.py +0 -6
  42. datashare_python-0.1.3/datashare_python/objects.py +0 -49
  43. datashare_python-0.1.3/datashare_python/task_client.py +0 -124
  44. datashare_python-0.1.3/datashare_python/tasks/__init__.py +0 -2
  45. datashare_python-0.1.3/datashare_python/tasks/classify_docs.py +0 -227
  46. datashare_python-0.1.3/datashare_python/tasks/dependencies.py +0 -110
  47. datashare_python-0.1.3/datashare_python/tasks/translate_docs.py +0 -223
  48. datashare_python-0.1.3/datashare_python/utils.py +0 -69
  49. datashare_python-0.1.3/pyproject.toml +0 -81
  50. datashare_python-0.1.3/tests/cli/test_tasks.py +0 -193
  51. datashare_python-0.1.3/tests/conftest.py +0 -281
  52. datashare_python-0.1.3/tests/tasks/test_translate_docs.py +0 -37
  53. datashare_python-0.1.3/tests/test_tasks.py +0 -181
  54. datashare_python-0.1.3/tests/test_utils.py +0 -31
  55. /datashare_python-0.1.3/datashare_python/__init__.py → /datashare_python-0.2.10/README.md +0 -0
  56. {datashare_python-0.1.3/tests → datashare_python-0.2.10/datashare_python}/__init__.py +0 -0
  57. {datashare_python-0.1.3/tests/cli → datashare_python-0.2.10/tests}/__init__.py +0 -0
@@ -0,0 +1,21 @@
1
+ .idea/
2
+ # Python
3
+ *.log
4
+ venv
5
+ *.egg-info
6
+ .eggs
7
+ __pycache__
8
+ *.pytest_cache
9
+ *.pyc
10
+ build
11
+ dist
12
+ test/.env
13
+ .cache
14
+ tmp
15
+ ./*.csv
16
+ .DS_Store
17
+
18
+ # VS code
19
+ .vscode
20
+ # Doc
21
+ site
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: datashare-python
3
+ Version: 0.2.10
4
+ Summary: Manage Python tasks and local resources in Datashare
5
+ Project-URL: Homepage, https://icij.github.io/datashare-python/
6
+ Project-URL: Documentation, https://icij.github.io/datashare-python/
7
+ Project-URL: Repository, https://github.com/ICIJ/datashare-python
8
+ Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
9
+ Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
10
+ Requires-Python: <4,>=3.11
11
+ Requires-Dist: aiohttp~=3.11.9
12
+ Requires-Dist: aiostream~=0.6.4
13
+ Requires-Dist: alive-progress~=3.2.0
14
+ Requires-Dist: icij-common[elasticsearch]~=0.7.3
15
+ Requires-Dist: nest-asyncio~=1.6.0
16
+ Requires-Dist: python-json-logger~=4.0.0
17
+ Requires-Dist: temporalio~=1.23.0
18
+ Requires-Dist: tomlkit>=0.14.0
19
+ Requires-Dist: typer~=0.15.4
20
+ Requires-Dist: worker-template
@@ -0,0 +1 @@
1
+ worker-template.tar.gz
@@ -1,7 +1,7 @@
1
- from datashare_python.cli import cli_app
1
+ from .cli import cli_app
2
2
 
3
3
 
4
- def main():
4
+ def main() -> None:
5
5
  cli_app()
6
6
 
7
7
 
@@ -0,0 +1,52 @@
1
+ import importlib.metadata
2
+ import os
3
+ from typing import Annotated
4
+
5
+ import typer
6
+ from icij_common.logging_utils import setup_loggers
7
+
8
+ import datashare_python
9
+ from datashare_python.cli.local import local_app
10
+ from datashare_python.cli.project import project_app
11
+ from datashare_python.cli.task import task_app
12
+ from datashare_python.cli.utils import AsyncTyper
13
+ from datashare_python.cli.worker import worker_app
14
+
15
+ cli_app = AsyncTyper(
16
+ context_settings={"help_option_names": ["-h", "--help"]},
17
+ pretty_exceptions_enable=False,
18
+ )
19
+ cli_app.add_typer(local_app)
20
+ cli_app.add_typer(project_app)
21
+ cli_app.add_typer(task_app)
22
+ cli_app.add_typer(worker_app)
23
+
24
+
25
+ def version_callback(value: bool) -> None: # noqa: FBT001
26
+ if value:
27
+ package_version = importlib.metadata.version(datashare_python.__name__)
28
+ print(package_version)
29
+ raise typer.Exit()
30
+
31
+
32
+ def pretty_exc_callback(value: bool) -> None: # noqa: FBT001
33
+ if not value:
34
+ os.environ["TYPER_STANDARD_TRACEBACK"] = "1"
35
+
36
+
37
+ @cli_app.callback()
38
+ def main(
39
+ version: Annotated[ # noqa: ARG001
40
+ bool | None,
41
+ typer.Option("--version", callback=version_callback, is_eager=True),
42
+ ] = None,
43
+ *,
44
+ pretty_exceptions: Annotated[ # noqa: ARG001
45
+ bool,
46
+ typer.Option(
47
+ "--pretty-exceptions", callback=pretty_exc_callback, is_eager=True
48
+ ),
49
+ ] = False,
50
+ ) -> None:
51
+ """Datashare Python CLI."""
52
+ setup_loggers(["__main__", datashare_python.__name__])
@@ -0,0 +1,33 @@
1
+ from typing import Annotated
2
+
3
+ import typer
4
+
5
+ from datashare_python.cli.utils import AsyncTyper
6
+ from datashare_python.constants import DEFAULT_NAMESPACE, DEFAULT_TEMPORAL_ADDRESS
7
+ from datashare_python.local_client import LocalClient
8
+
9
+ _REGISTER_NAMESPACE_HELP = "register namespace"
10
+ _TEMPORAL_URL_HELP = "address for temporal server"
11
+ _NAMESPACE_HELP = "namespace name"
12
+ _LOCAL = "local"
13
+
14
+ local_app = AsyncTyper(name=_LOCAL)
15
+
16
+
17
+ @local_app.async_command(help=_REGISTER_NAMESPACE_HELP)
18
+ async def register_namespace(
19
+ namespace: Annotated[
20
+ str, typer.Option("--namespace", "-n", help=_NAMESPACE_HELP)
21
+ ] = DEFAULT_NAMESPACE,
22
+ temporal_address: Annotated[
23
+ str, typer.Option("--temporal-address", "-a", help=_TEMPORAL_URL_HELP)
24
+ ] = DEFAULT_TEMPORAL_ADDRESS,
25
+ ) -> None:
26
+ """Create namespace
27
+
28
+ :param namespace: namespace
29
+ :param temporal_address: target host
30
+ """
31
+ client = LocalClient()
32
+
33
+ await client.register_namespace(temporal_address, namespace)
@@ -0,0 +1,34 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Annotated
4
+
5
+ import typer
6
+
7
+ from datashare_python.template import init_project
8
+
9
+ from .utils import AsyncTyper, eprint
10
+
11
+ _INIT_PROJECT_HELP = "initialize a new worker project from a template"
12
+ _INIT_PROJECT_NAME_HELP = "name of the new worker package"
13
+ _INIT_PROJECT_PATH_HELP = "path where project will be created"
14
+
15
+ _PROJECT = "project"
16
+
17
+ project_app = AsyncTyper(name=_PROJECT)
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @project_app.async_command(help=_INIT_PROJECT_HELP)
23
+ async def init(
24
+ name: Annotated[str, typer.Argument(help=_INIT_PROJECT_NAME_HELP)],
25
+ path: Annotated[
26
+ Path | None, typer.Option("--path", "-p", help=_INIT_PROJECT_PATH_HELP)
27
+ ] = None,
28
+ ) -> None:
29
+ if path is None:
30
+ path = Path(".")
31
+ eprint(f"Initializing {name} worker project in {path.absolute()}...")
32
+ init_project(name, path)
33
+ eprint(f"Project {name} initialized !")
34
+ print(path)
@@ -4,15 +4,14 @@ import logging
4
4
  import sys
5
5
  from pathlib import Path
6
6
  from traceback import FrameSummary, StackSummary
7
- from typing import Annotated, Any, Optional
7
+ from typing import Annotated, Any
8
8
 
9
9
  import typer
10
10
  from alive_progress import alive_bar
11
- from icij_worker import TaskState
12
- from icij_worker.objects import READY_STATES, Task, TaskError
13
11
 
14
12
  from datashare_python.cli.utils import AsyncTyper, eprint
15
13
  from datashare_python.constants import PYTHON_TASK_GROUP
14
+ from datashare_python.objects import READY_STATES, Task, TaskError, TaskState
16
15
  from datashare_python.task_client import DatashareTaskClient
17
16
 
18
17
  logger = logging.getLogger(__name__)
@@ -40,16 +39,16 @@ async def start(
40
39
  name: Annotated[str, typer.Argument(help=_NAME_HELP)],
41
40
  args: Annotated[TaskArgs, typer.Argument(help=_ARGS_HELP)] = None,
42
41
  group: Annotated[
43
- Optional[str],
42
+ str | None,
44
43
  typer.Option("--group", "-g", help=_GROUP_HELP),
45
44
  ] = PYTHON_TASK_GROUP.name,
46
45
  ds_address: Annotated[
47
46
  str, typer.Option("--ds-address", "-a", help=_DS_URL_HELP)
48
47
  ] = DEFAULT_DS_ADDRESS,
49
48
  ds_api_key: Annotated[
50
- Optional[str], typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
49
+ str | None, typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
51
50
  ] = None,
52
- ):
51
+ ) -> None:
53
52
  match args:
54
53
  case str():
55
54
  as_path = Path(name)
@@ -76,16 +75,16 @@ async def watch(
76
75
  str, typer.Option("--ds-address", "-a", help=_DS_URL_HELP)
77
76
  ] = DEFAULT_DS_ADDRESS,
78
77
  ds_api_key: Annotated[
79
- Optional[str], typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
78
+ str | None, typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
80
79
  ] = None,
81
80
  polling_interval_s: Annotated[
82
81
  float, typer.Option("--polling-interval-s", "-p", help=_POLLING_INTERVAL_S_HELP)
83
82
  ] = 1.0,
84
- ):
83
+ ) -> None:
85
84
  client = DatashareTaskClient(ds_address, api_key=ds_api_key)
86
85
  async with client:
87
86
  task = await client.get_task(task_id)
88
- if task.state is READY_STATES:
87
+ if task.state in READY_STATES:
89
88
  await _handle_ready(task, client, already_done=True)
90
89
  await _handle_alive(task, client, polling_interval_s)
91
90
  print(task_id)
@@ -98,19 +97,19 @@ async def result(
98
97
  str, typer.Option("--ds-address", "-a", help=_DS_URL_HELP)
99
98
  ] = DEFAULT_DS_ADDRESS,
100
99
  ds_api_key: Annotated[
101
- Optional[str], typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
100
+ str | None, typer.Option("--ds-api-key", "-k", help=_DS_API_KEY_HELP)
102
101
  ] = None,
103
102
  ) -> Any:
104
103
  client = DatashareTaskClient(ds_address, api_key=ds_api_key)
105
104
  async with client:
106
105
  res = await client.get_task_result(task_id)
107
- if isinstance(res, (dict, list)):
106
+ if isinstance(res, dict | list):
108
107
  res = json.dumps(res, indent=2)
109
108
  print(res)
110
109
 
111
110
 
112
111
  async def _handle_ready(
113
- task: Task, client: DatashareTaskClient, already_done: bool = False
112
+ task: Task, client: DatashareTaskClient, *, already_done: bool = False
114
113
  ) -> None:
115
114
  match task.state:
116
115
  case TaskState.ERROR:
@@ -126,27 +125,26 @@ async def _handle_ready(
126
125
  raise ValueError(f"Unexpected task state {task.state}")
127
126
 
128
127
 
129
- async def _handle_error(task, client: DatashareTaskClient):
128
+ async def _handle_error(task: Task, client: DatashareTaskClient) -> None:
130
129
  error = await client.get_task_error(task.id)
131
130
  eprint(
132
- f"Task({task.id}) failed with the following"
133
- f" error:\n\n{_format_error(error)}"
131
+ f"Task({task.id}) failed with the following error:\n\n{_format_error(error)}"
134
132
  )
135
133
  eprint(f"Task({task.id}) ❌")
136
134
  raise typer.Exit(code=1)
137
135
 
138
136
 
139
- async def _handle_cancelled(task):
137
+ async def _handle_cancelled(task: Task) -> None:
140
138
  eprint(f"Task({task.id}) was cancelled !")
141
139
  eprint(f"Task({task.id}) 🛑")
142
140
  raise typer.Exit(code=1)
143
141
 
144
142
 
145
- async def _handle_already_done(task):
143
+ async def _handle_already_done(task: Task) -> None:
146
144
  eprint(f"Task({task.id}) ✅ is already completed !")
147
145
 
148
146
 
149
- async def _handle_done(task):
147
+ async def _handle_done(task: Task) -> None:
150
148
  eprint(f"Task({task.id}) 🛬")
151
149
  eprint(f"Task({task.id}) ✅")
152
150
 
@@ -166,7 +164,7 @@ async def _handle_alive(
166
164
  task = await client.get_task(task.id)
167
165
  task_state = task.state
168
166
  progress = task.progress or 0.0
169
- bar(progress) # pylint: disable=not-callable
167
+ bar(progress)
170
168
  await asyncio.sleep(polling_interval_s)
171
169
  if task_state in READY_STATES:
172
170
  await _handle_ready(task, client)
@@ -1,16 +1,18 @@
1
1
  import asyncio
2
2
  import concurrent.futures
3
3
  import sys
4
+ from collections.abc import Callable
4
5
  from functools import wraps
6
+ from typing import Any
5
7
 
6
8
  import typer
7
9
 
8
10
 
9
11
  class AsyncTyper(typer.Typer):
10
- def async_command(self, *args, **kwargs):
11
- def decorator(async_func):
12
+ def async_command(self, *args, **kwargs) -> Callable[[Callable], Callable]:
13
+ def decorator(async_func: Callable) -> Callable:
12
14
  @wraps(async_func)
13
- def sync_func(*_args, **_kwargs):
15
+ def sync_func(*_args, **_kwargs) -> Any:
14
16
  res = asyncio.run(async_func(*_args, **_kwargs))
15
17
  return res
16
18
 
@@ -20,14 +22,14 @@ class AsyncTyper(typer.Typer):
20
22
  return decorator
21
23
 
22
24
 
23
- def eprint(*args, **kwargs):
25
+ def eprint(*args, **kwargs) -> None:
24
26
  print(*args, file=sys.stderr, **kwargs)
25
27
 
26
28
 
27
29
  def _to_concurrent(
28
30
  fut: asyncio.Future, loop: asyncio.AbstractEventLoop
29
31
  ) -> concurrent.futures.Future:
30
- async def wait():
32
+ async def wait() -> None:
31
33
  await fut
32
34
 
33
35
  return asyncio.run_coroutine_threadsafe(wait(), loop)
@@ -0,0 +1,114 @@
1
+ import logging
2
+ from typing import Annotated
3
+
4
+ import typer
5
+
6
+ from datashare_python.constants import DEFAULT_NAMESPACE, DEFAULT_TEMPORAL_ADDRESS
7
+ from datashare_python.discovery import discover_activities, discover_workflows
8
+ from datashare_python.types_ import TemporalClient
9
+ from datashare_python.worker import datashare_worker
10
+
11
+ from .utils import AsyncTyper
12
+
13
+ _START_WORKER_HELP = "start a datashare worker"
14
+
15
+ _LIST_WORKFLOWS_HELP = "list registered workflows"
16
+ _LIST_WORKFLOW_NAMES_HELP = "workflow names filters (supports regexes)"
17
+
18
+ _LIST_ACTIVITIES_HELP = "list registered activities"
19
+ _LIST_ACTIVITY_NAMES_HELP = "activity names filters (supports regexes)"
20
+
21
+ _START_WORKER_WORKFLOWS_HELP = "workflow names run by the worker (supports regexes)"
22
+ _START_WORKER_ACTIVITIES_HELP = "activity names run by the worker (supports regexes)"
23
+ _WORKER_QUEUE_HELP = "worker task queue"
24
+ _WORKER_MAX_ACTIVITIES_HELP = (
25
+ "maximum number of concurrent activities/tasks"
26
+ " concurrently run by the worker. Defaults to 1 to encourage horizontal scaling."
27
+ )
28
+ _TEMPORAL_NAMESPACE_HELP = "worker temporal namespace"
29
+
30
+ _TEMPORAL_URL_HELP = "address for temporal server"
31
+ _NAMESPACE_HELP = "namespace name"
32
+ _WORKER = "worker"
33
+
34
+ worker_app = AsyncTyper(name=_WORKER)
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ @worker_app.async_command(help=_LIST_WORKFLOWS_HELP)
40
+ async def list_workflows(
41
+ names: Annotated[list[str], typer.Argument(help=_LIST_WORKFLOW_NAMES_HELP)],
42
+ ) -> None:
43
+ workflows = [wf_name for wf_name, _ in discover_workflows(names)]
44
+ if not workflows:
45
+ out = """Couldn't find any registered workflow 🤔.
46
+ Make sure your workflow plugins correctly expose workflow entry points, refer to the \
47
+ documentation to learn how to do so."""
48
+ print(out)
49
+ return
50
+ workflows = "\n".join(f"- {wf}" for wf in workflows)
51
+ out = f"Found {len(workflows)} registered workflows:\n{workflows}"
52
+ print(out)
53
+
54
+
55
+ @worker_app.async_command(help=_LIST_ACTIVITIES_HELP)
56
+ async def list_activities(
57
+ names: Annotated[list[str], typer.Argument(help=_LIST_ACTIVITY_NAMES_HELP)],
58
+ ) -> None:
59
+ activities = [act_name for act_name, _ in discover_activities(names)]
60
+ if not activities:
61
+ out = """Couldn't find any registered activity 🤔.
62
+ Make sure your activity plugins correctly expose activity entry points, refer \
63
+ to the documentation to learn how to do so."""
64
+ print(out)
65
+ return
66
+ activities = "\n".join(f"- {act}" for act in activities)
67
+ out = f"Found {len(activities)} registered activities:\n{activities}"
68
+ print(out)
69
+
70
+
71
+ @worker_app.async_command(help=_START_WORKER_HELP)
72
+ async def start(
73
+ workflows: Annotated[list[str], typer.Option(help=_START_WORKER_WORKFLOWS_HELP)],
74
+ activities: Annotated[list[str], typer.Option(help=_START_WORKER_ACTIVITIES_HELP)],
75
+ queue: Annotated[str, typer.Option("--queue", "-q", help=_WORKER_QUEUE_HELP)],
76
+ temporal_address: Annotated[
77
+ str, typer.Option("--temporal-address", "-a", help=_TEMPORAL_URL_HELP)
78
+ ] = DEFAULT_TEMPORAL_ADDRESS,
79
+ namespace: Annotated[
80
+ str, typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP)
81
+ ] = DEFAULT_NAMESPACE,
82
+ max_concurrent_activities: Annotated[
83
+ int, typer.Option("--max-activities", help=_WORKER_MAX_ACTIVITIES_HELP)
84
+ ] = 1,
85
+ ) -> None:
86
+ wf_names, wfs = zip(*discover_workflows(workflows), strict=False)
87
+ registered = ""
88
+ if wf_names:
89
+ n_wfs = len(wf_names)
90
+ registered += (
91
+ f"- {n_wfs} workflow{'s' if n_wfs > 1 else ''}: {','.join(wf_names)}"
92
+ )
93
+ act_names, acts = zip(*discover_activities(activities), strict=False)
94
+ if act_names:
95
+ if registered:
96
+ registered += "\n"
97
+ i = len(act_names)
98
+ registered += f"- {i} activit{'ies' if i > 1 else 'y'}: {','.join(act_names)}"
99
+ if not acts and not wfs:
100
+ raise ValueError("Couldn't find any registered activity or workflow.")
101
+ logger.info("Starting datashare worker running:\n%s", registered)
102
+ client = await TemporalClient.connect(temporal_address, namespace=namespace)
103
+ worker = datashare_worker(
104
+ client,
105
+ workflows=wfs,
106
+ activities=acts,
107
+ task_queue=queue,
108
+ max_concurrent_activities=max_concurrent_activities,
109
+ )
110
+ try:
111
+ await worker.run()
112
+ except Exception as e: # noqa: BLE001
113
+ await worker.shutdown()
114
+ raise e
@@ -0,0 +1,93 @@
1
+ from typing import ClassVar
2
+
3
+ from icij_common.es import ESClient
4
+ from icij_common.pydantic_utils import ICIJSettings
5
+ from pydantic import Field, PrivateAttr
6
+ from pydantic_settings import SettingsConfigDict
7
+ from temporalio.contrib.pydantic import pydantic_data_converter
8
+
9
+ import datashare_python
10
+
11
+ from .objects import BaseModel
12
+ from .task_client import DatashareTaskClient
13
+ from .types_ import TemporalClient
14
+ from .utils import LogWithWorkerIDMixin
15
+
16
+ _ALL_LOGGERS = [datashare_python.__name__]
17
+
18
+ DS_WORKER_SETTINGS_CONFIG = SettingsConfigDict(
19
+ env_prefix="DS_WORKER_",
20
+ env_nested_delimiter="__",
21
+ nested_model_default_partial_update=True,
22
+ )
23
+
24
+
25
+ class ESClientConfig(BaseModel):
26
+ address: str = "http://localhost:9200"
27
+ default_page_size: int = 1000
28
+ keep_alive: str = "10m"
29
+ max_concurrency: int = 5
30
+ max_retries: int = 0
31
+ max_retry_wait_s: int | float = 60
32
+ timeout_s: int | float = 60 * 5
33
+
34
+ def to_es_client(self, api_key: str | None = None) -> ESClient:
35
+ client = ESClient(
36
+ hosts=[self.address],
37
+ pagination=self.default_page_size,
38
+ max_concurrency=self.max_concurrency,
39
+ keep_alive=self.keep_alive,
40
+ timeout=self.timeout_s,
41
+ max_retries=self.max_retries,
42
+ max_retry_wait_s=self.max_retry_wait_s,
43
+ api_key=api_key,
44
+ )
45
+ client.transport._verified_elasticsearch = True
46
+ return client
47
+
48
+
49
+ class DatashareClientConfig(BaseModel):
50
+ api_key: str | None = None
51
+ url: str = "http://datashare:8080"
52
+
53
+ def to_task_client(self) -> DatashareTaskClient:
54
+ return DatashareTaskClient(self.url, self.api_key)
55
+
56
+
57
+ class TemporalClientConfig(BaseModel):
58
+ host: str = "temporal:7233"
59
+ namespace: str = "datashare-default"
60
+ _client: TemporalClient | None = PrivateAttr(default=None)
61
+
62
+ async def to_client(self) -> TemporalClient:
63
+ if self._client is None:
64
+ self._client = await TemporalClient.connect(
65
+ target_host=self.host,
66
+ namespace=self.namespace,
67
+ data_converter=pydantic_data_converter,
68
+ )
69
+ return self._client
70
+
71
+ # For the lru_cache
72
+ def __hash__(self) -> int:
73
+ return id(self)
74
+
75
+
76
+ class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
77
+ model_config = DS_WORKER_SETTINGS_CONFIG
78
+
79
+ loggers: ClassVar[list[str]] = Field(_ALL_LOGGERS, frozen=True)
80
+ log_level: str = Field(default="INFO")
81
+
82
+ datashare: DatashareClientConfig = DatashareClientConfig()
83
+ elasticsearch: ESClientConfig = ESClientConfig()
84
+ temporal: TemporalClientConfig = TemporalClientConfig()
85
+
86
+ def to_es_client(self) -> ESClient:
87
+ return self.elasticsearch.to_es_client(self.datashare.api_key)
88
+
89
+ def to_task_client(self) -> DatashareTaskClient:
90
+ return self.datashare.to_task_client()
91
+
92
+ async def to_temporal_client(self) -> TemporalClient:
93
+ return await self.temporal.to_client()