datashare-python 0.2.23__tar.gz → 0.2.25__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datashare_python-0.2.23 → datashare_python-0.2.25}/PKG-INFO +4 -4
- datashare_python-0.2.25/datashare_python/cli/worker.py +137 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/config.py +30 -2
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/conftest.py +11 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/constants.py +2 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/dependencies.py +27 -7
- datashare_python-0.2.25/datashare_python/discovery.py +178 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/objects.py +17 -3
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/template.py +11 -4
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/utils.py +152 -25
- datashare_python-0.2.25/datashare_python/worker-template.tar.gz +0 -0
- datashare_python-0.2.25/datashare_python/worker.py +175 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/pyproject.toml +4 -5
- datashare_python-0.2.23/datashare_python/cli/worker.py +0 -114
- datashare_python-0.2.23/datashare_python/discovery.py +0 -79
- datashare_python-0.2.23/datashare_python/worker-template.tar.gz +0 -0
- datashare_python-0.2.23/datashare_python/worker.py +0 -74
- {datashare_python-0.2.23 → datashare_python-0.2.25}/.gitignore +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/README.md +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/.gitignore +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/__init__.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/__main__.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/cli/__init__.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/cli/local.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/cli/project.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/cli/task.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/cli/utils.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/exceptions.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/local_client.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/task_client.py +0 -0
- {datashare_python-0.2.23 → datashare_python-0.2.25}/datashare_python/types_.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datashare-python
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.25
|
|
4
4
|
Summary: Manage Pythoœn tasks and local resources in Datashare
|
|
5
5
|
Project-URL: Homepage, https://icij.github.io/datashare-python/
|
|
6
6
|
Project-URL: Documentation, https://icij.github.io/datashare-python/
|
|
@@ -9,13 +9,13 @@ Project-URL: Issues, https://github.com/ICIJ/datashare-python/issues
|
|
|
9
9
|
Author-email: Clément Doumouro <cdoumouro@icij.org>, Clément Doumouro <clement.doumouro@gmail.com>, Lion Summerbell <lsummerbell@icij.org>
|
|
10
10
|
Requires-Python: <4,>=3.11
|
|
11
11
|
Requires-Dist: aiohttp~=3.11.9
|
|
12
|
-
Requires-Dist: aiostream~=0.6.4
|
|
13
12
|
Requires-Dist: alive-progress~=3.2.0
|
|
14
|
-
Requires-Dist: datashare-worker-template~=0.1
|
|
13
|
+
Requires-Dist: datashare-worker-template[ml]~=0.1
|
|
15
14
|
Requires-Dist: hatchling~=1.27.0
|
|
16
|
-
Requires-Dist: icij-common[elasticsearch]~=0.
|
|
15
|
+
Requires-Dist: icij-common[elasticsearch]~=0.8.2
|
|
17
16
|
Requires-Dist: nest-asyncio~=1.6.0
|
|
18
17
|
Requires-Dist: python-json-logger~=4.0.0
|
|
18
|
+
Requires-Dist: pyyaml~=6.0
|
|
19
19
|
Requires-Dist: temporalio~=1.23.0
|
|
20
20
|
Requires-Dist: tomlkit~=0.14.0
|
|
21
21
|
Requires-Dist: typer~=0.15.4
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
import yaml
|
|
8
|
+
from icij_common.pydantic_utils import safe_copy
|
|
9
|
+
|
|
10
|
+
from datashare_python.config import TemporalClientConfig, WorkerConfig
|
|
11
|
+
from datashare_python.discovery import discover, discover_activities, discover_workflows
|
|
12
|
+
from datashare_python.worker import bootstrap_worker, create_worker_id
|
|
13
|
+
|
|
14
|
+
from .utils import AsyncTyper
|
|
15
|
+
|
|
16
|
+
_START_WORKER_HELP = "start a datashare worker"
|
|
17
|
+
|
|
18
|
+
_LIST_WORKFLOWS_HELP = "list registered workflows"
|
|
19
|
+
_LIST_WORKFLOW_NAMES_HELP = "workflow names filters (supports regexes)"
|
|
20
|
+
|
|
21
|
+
_LIST_ACTIVITIES_HELP = "list registered activities"
|
|
22
|
+
_LIST_ACTIVITY_NAMES_HELP = "activity names filters (supports regexes)"
|
|
23
|
+
|
|
24
|
+
_START_WORKER_WORKFLOWS_HELP = "workflow names run by the worker (supports regexes)"
|
|
25
|
+
_START_WORKER_ACTIVITIES_HELP = "activity names run by the worker (supports regexes)"
|
|
26
|
+
_START_WORKER_DEPS_HELP = "worker lifetime dependencies name in the registry"
|
|
27
|
+
_START_WORKER_WORKER_ID_PREFIX_HELP = "worker ID prefix"
|
|
28
|
+
_START_WORKER_CONFIG_PATH_HELP = (
|
|
29
|
+
"path to a worker config YAML file,"
|
|
30
|
+
" if not provided will load worker configuration from env variables"
|
|
31
|
+
)
|
|
32
|
+
_WORKER_QUEUE_HELP = "worker task queue"
|
|
33
|
+
_TEMPORAL_NAMESPACE_HELP = "worker temporal namespace"
|
|
34
|
+
|
|
35
|
+
_TEMPORAL_URL_HELP = "address for temporal server"
|
|
36
|
+
_NAMESPACE_HELP = "namespace name"
|
|
37
|
+
_WORKER = "worker"
|
|
38
|
+
|
|
39
|
+
worker_app = AsyncTyper(name=_WORKER)
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@worker_app.async_command(help=_LIST_WORKFLOWS_HELP)
|
|
45
|
+
async def list_workflows(
|
|
46
|
+
names: Annotated[list[str], typer.Argument(help=_LIST_WORKFLOW_NAMES_HELP)],
|
|
47
|
+
) -> None:
|
|
48
|
+
workflows = [wf_name for wf_name, _ in discover_workflows(names)]
|
|
49
|
+
if not workflows:
|
|
50
|
+
out = """Couldn't find any registered workflow 🤔.
|
|
51
|
+
Make sure your workflow plugins correctly expose workflow entry points, refer to the \
|
|
52
|
+
documentation to learn how to do so."""
|
|
53
|
+
print(out)
|
|
54
|
+
return
|
|
55
|
+
workflows = "\n".join(f"- {wf}" for wf in workflows)
|
|
56
|
+
out = f"Found {len(workflows)} registered workflows:\n{workflows}"
|
|
57
|
+
print(out)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@worker_app.async_command(help=_LIST_ACTIVITIES_HELP)
|
|
61
|
+
async def list_activities(
|
|
62
|
+
names: Annotated[list[str], typer.Argument(help=_LIST_ACTIVITY_NAMES_HELP)],
|
|
63
|
+
) -> None:
|
|
64
|
+
activities = [act_name for act_name, _ in discover_activities(names)]
|
|
65
|
+
if not activities:
|
|
66
|
+
out = """Couldn't find any registered activity 🤔.
|
|
67
|
+
Make sure your activity plugins correctly expose activity entry points, refer \
|
|
68
|
+
to the documentation to learn how to do so."""
|
|
69
|
+
print(out)
|
|
70
|
+
return
|
|
71
|
+
activities = "\n".join(f"- {act}" for act in activities)
|
|
72
|
+
out = f"Found {len(activities)} registered activities:\n{activities}"
|
|
73
|
+
print(out)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
@worker_app.async_command(help=_START_WORKER_HELP)
|
|
77
|
+
async def start(
|
|
78
|
+
workflows: Annotated[list[str], typer.Option(help=_START_WORKER_WORKFLOWS_HELP)],
|
|
79
|
+
activities: Annotated[list[str], typer.Option(help=_START_WORKER_ACTIVITIES_HELP)],
|
|
80
|
+
queue: Annotated[str, typer.Option("--queue", "-q", help=_WORKER_QUEUE_HELP)],
|
|
81
|
+
dependencies: Annotated[
|
|
82
|
+
str | None, typer.Option(help=_START_WORKER_DEPS_HELP)
|
|
83
|
+
] = None,
|
|
84
|
+
config_path: Annotated[
|
|
85
|
+
Path | None,
|
|
86
|
+
typer.Option(
|
|
87
|
+
"--config-path", "--config", "-c", help=_START_WORKER_CONFIG_PATH_HELP
|
|
88
|
+
),
|
|
89
|
+
] = None,
|
|
90
|
+
worker_id_prefix: Annotated[
|
|
91
|
+
str | None, typer.Option(help=_START_WORKER_WORKER_ID_PREFIX_HELP)
|
|
92
|
+
] = None,
|
|
93
|
+
temporal_address: Annotated[
|
|
94
|
+
str | None, typer.Option("--temporal-address", "-a", help=_TEMPORAL_URL_HELP)
|
|
95
|
+
] = None,
|
|
96
|
+
namespace: Annotated[
|
|
97
|
+
str | None,
|
|
98
|
+
typer.Option("--temporal-namespace", "-ns", help=_TEMPORAL_NAMESPACE_HELP),
|
|
99
|
+
] = None,
|
|
100
|
+
) -> None:
|
|
101
|
+
if config_path is not None:
|
|
102
|
+
with config_path.open() as f:
|
|
103
|
+
bootstrap_config = WorkerConfig.model_validate(
|
|
104
|
+
yaml.load(f, Loader=yaml.Loader)
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
bootstrap_config = WorkerConfig()
|
|
108
|
+
temporal_override = dict()
|
|
109
|
+
if temporal_address is not None:
|
|
110
|
+
temporal_override["host"] = temporal_address
|
|
111
|
+
if namespace is not None:
|
|
112
|
+
temporal_override["namespace"] = namespace
|
|
113
|
+
if temporal_override:
|
|
114
|
+
temporal_config = TemporalClientConfig(**temporal_override)
|
|
115
|
+
update = {"temporal": temporal_config}
|
|
116
|
+
bootstrap_config = safe_copy(bootstrap_config, update=update)
|
|
117
|
+
registered_wfs, registered_acts, registered_deps = discover(
|
|
118
|
+
workflows, act_names=activities, deps_name=dependencies
|
|
119
|
+
)
|
|
120
|
+
worker_id = create_worker_id(worker_id_prefix or "worker")
|
|
121
|
+
client = await bootstrap_config.to_temporal_client()
|
|
122
|
+
event_loop = asyncio.get_event_loop()
|
|
123
|
+
async with bootstrap_worker(
|
|
124
|
+
worker_id,
|
|
125
|
+
activities=registered_acts,
|
|
126
|
+
workflows=registered_wfs,
|
|
127
|
+
dependencies=registered_deps,
|
|
128
|
+
bootstrap_config=bootstrap_config,
|
|
129
|
+
client=client,
|
|
130
|
+
event_loop=event_loop,
|
|
131
|
+
task_queue=queue,
|
|
132
|
+
) as worker:
|
|
133
|
+
try:
|
|
134
|
+
await worker.run()
|
|
135
|
+
except Exception as e: # noqa: BLE001
|
|
136
|
+
await worker.shutdown()
|
|
137
|
+
raise e
|
|
@@ -4,7 +4,13 @@ from icij_common.es import ESClient
|
|
|
4
4
|
from icij_common.pydantic_utils import ICIJSettings
|
|
5
5
|
from pydantic import Field, PrivateAttr
|
|
6
6
|
from pydantic_settings import SettingsConfigDict
|
|
7
|
-
from temporalio.contrib.pydantic import
|
|
7
|
+
from temporalio.contrib.pydantic import PydanticJSONPlainPayloadConverter, ToJsonOptions
|
|
8
|
+
from temporalio.converter import (
|
|
9
|
+
CompositePayloadConverter,
|
|
10
|
+
DataConverter,
|
|
11
|
+
DefaultPayloadConverter,
|
|
12
|
+
JSONPlainPayloadConverter,
|
|
13
|
+
)
|
|
8
14
|
|
|
9
15
|
import datashare_python
|
|
10
16
|
|
|
@@ -64,7 +70,7 @@ class TemporalClientConfig(BaseModel):
|
|
|
64
70
|
self._client = await TemporalClient.connect(
|
|
65
71
|
target_host=self.host,
|
|
66
72
|
namespace=self.namespace,
|
|
67
|
-
data_converter=
|
|
73
|
+
data_converter=PYDANTIC_DATA_CONVERTER,
|
|
68
74
|
)
|
|
69
75
|
return self._client
|
|
70
76
|
|
|
@@ -83,6 +89,8 @@ class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
|
|
|
83
89
|
elasticsearch: ESClientConfig = ESClientConfig()
|
|
84
90
|
temporal: TemporalClientConfig = TemporalClientConfig()
|
|
85
91
|
|
|
92
|
+
max_concurrent_io_activities: int = 5
|
|
93
|
+
|
|
86
94
|
def to_es_client(self) -> ESClient:
|
|
87
95
|
return self.elasticsearch.to_es_client(self.datashare.api_key)
|
|
88
96
|
|
|
@@ -91,3 +99,23 @@ class WorkerConfig(ICIJSettings, LogWithWorkerIDMixin, BaseModel):
|
|
|
91
99
|
|
|
92
100
|
async def to_temporal_client(self) -> TemporalClient:
|
|
93
101
|
return await self.temporal.to_client()
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class _PydanticPayloadConverter(CompositePayloadConverter):
|
|
105
|
+
def __init__(self) -> None:
|
|
106
|
+
json_payload_converter = PydanticJSONPlainPayloadConverter(
|
|
107
|
+
ToJsonOptions(exclude_unset=False)
|
|
108
|
+
)
|
|
109
|
+
super().__init__(
|
|
110
|
+
*(
|
|
111
|
+
c
|
|
112
|
+
if not isinstance(c, JSONPlainPayloadConverter)
|
|
113
|
+
else json_payload_converter
|
|
114
|
+
for c in DefaultPayloadConverter.default_encoding_payload_converters
|
|
115
|
+
)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
PYDANTIC_DATA_CONVERTER = DataConverter(
|
|
120
|
+
payload_converter_class=_PydanticPayloadConverter
|
|
121
|
+
)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from asyncio import AbstractEventLoop
|
|
3
3
|
from collections.abc import AsyncGenerator, Generator, Iterator, Sequence
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
import aiohttp
|
|
6
7
|
import pytest
|
|
@@ -50,6 +51,9 @@ _INDEX_BODY = {
|
|
|
50
51
|
"language": {"type": "keyword"},
|
|
51
52
|
"documentId": {"type": "keyword"},
|
|
52
53
|
"join": {"type": "join", "relations": {"Document": "NamedEntity"}},
|
|
54
|
+
"contentType": {"type": "keyword"},
|
|
55
|
+
"content": {"type": "text"},
|
|
56
|
+
"contentTranslated": {"type": "text"},
|
|
53
57
|
}
|
|
54
58
|
}
|
|
55
59
|
}
|
|
@@ -102,6 +106,13 @@ def test_worker_config() -> WorkerConfig:
|
|
|
102
106
|
)
|
|
103
107
|
|
|
104
108
|
|
|
109
|
+
@pytest.fixture
|
|
110
|
+
def test_worker_config_path(test_worker_config: WorkerConfig, tmpdir: Path) -> Path:
|
|
111
|
+
config_path = Path(tmpdir) / "config.json"
|
|
112
|
+
config_path.write_text(test_worker_config.model_dump_json())
|
|
113
|
+
return config_path
|
|
114
|
+
|
|
115
|
+
|
|
105
116
|
@pytest.fixture(scope="session")
|
|
106
117
|
async def worker_lifetime_deps(
|
|
107
118
|
event_loop: AbstractEventLoop,
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
import inspect
|
|
1
2
|
import logging
|
|
2
3
|
from asyncio import AbstractEventLoop, iscoroutine
|
|
3
|
-
from collections.abc import AsyncGenerator
|
|
4
|
+
from collections.abc import AsyncGenerator, Callable
|
|
4
5
|
from contextlib import AsyncExitStack, asynccontextmanager
|
|
5
6
|
from contextvars import ContextVar
|
|
7
|
+
from copy import deepcopy
|
|
8
|
+
from typing import Any
|
|
6
9
|
|
|
7
10
|
from icij_common.es import ESClient
|
|
8
11
|
|
|
@@ -20,7 +23,7 @@ TASK_CLIENT: ContextVar[DatashareTaskClient] = ContextVar("task_client")
|
|
|
20
23
|
TEMPORAL_CLIENT: ContextVar[TemporalClient] = ContextVar("temporal_client")
|
|
21
24
|
|
|
22
25
|
|
|
23
|
-
def set_event_loop(event_loop: AbstractEventLoop
|
|
26
|
+
def set_event_loop(event_loop: AbstractEventLoop) -> None:
|
|
24
27
|
EVENT_LOOP.set(event_loop)
|
|
25
28
|
|
|
26
29
|
|
|
@@ -31,13 +34,13 @@ def lifespan_event_loop() -> AbstractEventLoop:
|
|
|
31
34
|
raise DependencyInjectionError("event loop") from e
|
|
32
35
|
|
|
33
36
|
|
|
34
|
-
def set_loggers(worker_config: WorkerConfig, worker_id: str
|
|
37
|
+
def set_loggers(worker_config: WorkerConfig, worker_id: str) -> None:
|
|
35
38
|
worker_config.setup_loggers(worker_id=worker_id)
|
|
36
39
|
logger.info("worker loggers ready to log 💬")
|
|
37
40
|
logger.info("app config: %s", worker_config.model_dump_json(indent=2))
|
|
38
41
|
|
|
39
42
|
|
|
40
|
-
async def set_es_client(worker_config: WorkerConfig
|
|
43
|
+
async def set_es_client(worker_config: WorkerConfig) -> ESClient:
|
|
41
44
|
client = worker_config.to_es_client()
|
|
42
45
|
ES_CLIENT.set(client)
|
|
43
46
|
return client
|
|
@@ -52,7 +55,7 @@ def lifespan_es_client() -> ESClient:
|
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
# Task client setup
|
|
55
|
-
async def set_task_client(worker_config: WorkerConfig
|
|
58
|
+
async def set_task_client(worker_config: WorkerConfig) -> DatashareTaskClient:
|
|
56
59
|
task_client = worker_config.to_task_client()
|
|
57
60
|
TASK_CLIENT.set(task_client)
|
|
58
61
|
return task_client
|
|
@@ -67,7 +70,7 @@ def lifespan_task_client() -> DatashareTaskClient:
|
|
|
67
70
|
|
|
68
71
|
|
|
69
72
|
# Temporal client setup
|
|
70
|
-
async def set_temporal_client(worker_config: WorkerConfig
|
|
73
|
+
async def set_temporal_client(worker_config: WorkerConfig) -> None:
|
|
71
74
|
client = await worker_config.to_temporal_client()
|
|
72
75
|
TEMPORAL_CLIENT.set(client)
|
|
73
76
|
|
|
@@ -86,7 +89,7 @@ async def with_dependencies(
|
|
|
86
89
|
) -> AsyncGenerator[None, None]:
|
|
87
90
|
async with AsyncExitStack() as stack:
|
|
88
91
|
for dep in dependencies:
|
|
89
|
-
cm = dep(**kwargs)
|
|
92
|
+
cm = dep(**add_missing_args(dep, dict(), **kwargs))
|
|
90
93
|
if hasattr(cm, "__aenter__"):
|
|
91
94
|
await stack.enter_async_context(cm)
|
|
92
95
|
elif hasattr(cm, "__enter__"):
|
|
@@ -94,3 +97,20 @@ async def with_dependencies(
|
|
|
94
97
|
elif iscoroutine(cm):
|
|
95
98
|
await cm
|
|
96
99
|
yield
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def add_missing_args(fn: Callable, args: dict[str, Any], **kwargs) -> dict[str, Any]:
|
|
103
|
+
# We make the choice not to raise in case of missing argument here, the error will
|
|
104
|
+
# be correctly raise when the function is called
|
|
105
|
+
from_kwargs = dict()
|
|
106
|
+
sig = inspect.signature(fn)
|
|
107
|
+
for param_name in sig.parameters:
|
|
108
|
+
if param_name in args:
|
|
109
|
+
continue
|
|
110
|
+
kwargs_value = kwargs.get(param_name)
|
|
111
|
+
if kwargs_value is not None:
|
|
112
|
+
from_kwargs[param_name] = kwargs_value
|
|
113
|
+
if from_kwargs:
|
|
114
|
+
args = deepcopy(args)
|
|
115
|
+
args.update(from_kwargs)
|
|
116
|
+
return args
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import re
|
|
3
|
+
from collections.abc import Callable, Iterable
|
|
4
|
+
from importlib.metadata import entry_points
|
|
5
|
+
|
|
6
|
+
from .types_ import ContextManagerFactory
|
|
7
|
+
from .utils import ActivityWithProgress
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
Activity = ActivityWithProgress | Callable | type
|
|
12
|
+
|
|
13
|
+
_DEPENDENCIES = "dependencies"
|
|
14
|
+
_WORKFLOW_GROUPS = "datashare.workflows"
|
|
15
|
+
_ACTIVITIES_GROUPS = "datashare.activities"
|
|
16
|
+
_DEPENDENCIES_GROUPS = "datashare.dependencies"
|
|
17
|
+
|
|
18
|
+
_RegisteredWorkflow = tuple[str, type]
|
|
19
|
+
_RegisteredActivity = tuple[str, Activity]
|
|
20
|
+
_Dependencies = list[ContextManagerFactory]
|
|
21
|
+
_Discovery = tuple[
|
|
22
|
+
Iterable[_RegisteredWorkflow] | None,
|
|
23
|
+
Iterable[_RegisteredActivity] | None,
|
|
24
|
+
_Dependencies | None,
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def discover(
|
|
29
|
+
wf_names: list[str] | None, *, act_names: list[str] | None, deps_name: str | None
|
|
30
|
+
) -> _Discovery:
|
|
31
|
+
discovered = ""
|
|
32
|
+
wfs = None
|
|
33
|
+
if wf_names is not None:
|
|
34
|
+
discovered_wfs = discover_workflows(wf_names)
|
|
35
|
+
if discovered_wfs:
|
|
36
|
+
wf_names, wfs = zip(*discovered_wfs, strict=True)
|
|
37
|
+
if wf_names:
|
|
38
|
+
n_wfs = len(wf_names)
|
|
39
|
+
discovered += (
|
|
40
|
+
f"- {n_wfs} workflow{'s' if n_wfs > 1 else ''}:"
|
|
41
|
+
f" {', '.join(wf_names)}"
|
|
42
|
+
)
|
|
43
|
+
acts = None
|
|
44
|
+
if act_names is not None:
|
|
45
|
+
discovered_acts = discover_activities(act_names)
|
|
46
|
+
if discovered_acts:
|
|
47
|
+
act_names, acts = zip(*discovered_acts, strict=True)
|
|
48
|
+
if act_names:
|
|
49
|
+
if discovered:
|
|
50
|
+
discovered += "\n"
|
|
51
|
+
n_acts = len(act_names)
|
|
52
|
+
discovered += (
|
|
53
|
+
f"- {n_acts} activit{'ies' if n_acts > 1 else 'y'}:"
|
|
54
|
+
f" {', '.join(act_names)}"
|
|
55
|
+
)
|
|
56
|
+
if not acts and not wfs:
|
|
57
|
+
raise ValueError("Couldn't find any registered activity or workflow.")
|
|
58
|
+
deps = discover_dependencies(deps_name)
|
|
59
|
+
if deps:
|
|
60
|
+
n_deps = len(deps)
|
|
61
|
+
discovered += "\n"
|
|
62
|
+
deps_names = (d.__name__ for d in deps)
|
|
63
|
+
discovered += (
|
|
64
|
+
f"- {n_deps} dependenc{'ies' if n_deps > 1 else 'y'}:"
|
|
65
|
+
f" {', '.join(deps_names)}"
|
|
66
|
+
)
|
|
67
|
+
logger.info("discovered:\n%s", discovered)
|
|
68
|
+
return wfs, acts, deps
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def discover_workflows(names: list[str]) -> list[_RegisteredWorkflow]:
|
|
72
|
+
pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
|
|
73
|
+
impls = entry_points(group=_WORKFLOW_GROUPS)
|
|
74
|
+
registered = []
|
|
75
|
+
for wf_impls in impls:
|
|
76
|
+
wf_impls = wf_impls.load() # noqa: PLW2901
|
|
77
|
+
if not isinstance(wf_impls, list | tuple | set):
|
|
78
|
+
wf_impls = [wf_impls] # noqa: PLW2901
|
|
79
|
+
for wf_impl in wf_impls:
|
|
80
|
+
wf_name = _parse_wf_name(wf_impl)
|
|
81
|
+
if pattern and not pattern.match(wf_name):
|
|
82
|
+
continue
|
|
83
|
+
registered.append((wf_name, wf_impl))
|
|
84
|
+
return registered
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def discover_activities(names: list[str]) -> list[_RegisteredActivity]:
|
|
88
|
+
pattern = None if not names else re.compile(rf"^{'|'.join(names)}$")
|
|
89
|
+
impls = entry_points(group=_ACTIVITIES_GROUPS)
|
|
90
|
+
registered = []
|
|
91
|
+
for act_impls in impls:
|
|
92
|
+
act_impls = act_impls.load() # noqa: PLW2901
|
|
93
|
+
if not isinstance(act_impls, list | tuple | set):
|
|
94
|
+
act_impls = [act_impls] # noqa: PLW2901
|
|
95
|
+
for act_impl in act_impls:
|
|
96
|
+
act_name = _parse_activity_name(act_impl)
|
|
97
|
+
if pattern and not pattern.match(act_name):
|
|
98
|
+
continue
|
|
99
|
+
registered.append((act_name, act_impl))
|
|
100
|
+
return registered
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def discover_dependencies(name: str | None) -> _Dependencies | None:
|
|
104
|
+
impls = entry_points(name=_DEPENDENCIES, group=_DEPENDENCIES_GROUPS)
|
|
105
|
+
if not impls:
|
|
106
|
+
if name is None:
|
|
107
|
+
return None
|
|
108
|
+
available_impls = entry_points(group=_DEPENDENCIES_GROUPS)
|
|
109
|
+
msg = (
|
|
110
|
+
f'failed to find dependency: "{name}", '
|
|
111
|
+
f"available dependencies: {available_impls}"
|
|
112
|
+
)
|
|
113
|
+
raise LookupError(msg)
|
|
114
|
+
if len(impls) > 1:
|
|
115
|
+
msg = f'found multiple dependencies for name "{name}": {impls}'
|
|
116
|
+
raise ValueError(msg)
|
|
117
|
+
deps_registry = impls[_DEPENDENCIES].load()
|
|
118
|
+
if name:
|
|
119
|
+
try:
|
|
120
|
+
return deps_registry[name]
|
|
121
|
+
except KeyError as e:
|
|
122
|
+
available = list(deps_registry)
|
|
123
|
+
msg = (
|
|
124
|
+
f'failed to find dependency for name "{name}", available dependencies: '
|
|
125
|
+
f"{available}"
|
|
126
|
+
)
|
|
127
|
+
raise LookupError(msg) from e
|
|
128
|
+
if not deps_registry:
|
|
129
|
+
raise ValueError("empty dependency registry !")
|
|
130
|
+
if len(deps_registry) > 1:
|
|
131
|
+
available = ", ".join('"' + d + '"' for d in deps_registry)
|
|
132
|
+
msg = (
|
|
133
|
+
f"dependency registry contains multiples entries {available},"
|
|
134
|
+
f" please select one by providing a name"
|
|
135
|
+
)
|
|
136
|
+
raise ValueError(msg)
|
|
137
|
+
return next(iter(deps_registry.values()))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _parse_wf_name(wf_type: type) -> str:
|
|
141
|
+
if not isinstance(wf_type, type):
|
|
142
|
+
msg = (
|
|
143
|
+
f"expected registered workflow implementation to be a temporal workflow"
|
|
144
|
+
f" decorated with @workflow.defn(name=<name>) class, found: {type(wf_type)}"
|
|
145
|
+
)
|
|
146
|
+
raise TypeError(msg)
|
|
147
|
+
|
|
148
|
+
wf_defn = getattr(wf_type, "__temporal_workflow_definition", None)
|
|
149
|
+
if wf_defn is None:
|
|
150
|
+
msg = (
|
|
151
|
+
f"expected registered workflow implementation to be a temporal workflow"
|
|
152
|
+
f" decorated with @workflow.defn(name=<name>) class, found: {wf_type}"
|
|
153
|
+
)
|
|
154
|
+
raise ValueError(msg)
|
|
155
|
+
if wf_defn.name is None:
|
|
156
|
+
msg = (
|
|
157
|
+
"missing workflow definition name, please register your workflow"
|
|
158
|
+
" with an explicit name: @workflow.defn(name=<name>)"
|
|
159
|
+
)
|
|
160
|
+
raise ValueError(msg)
|
|
161
|
+
return wf_defn.name
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _parse_activity_name(act: Activity) -> str:
|
|
165
|
+
act_defn = getattr(act, "__temporal_activity_definition", None)
|
|
166
|
+
if act_defn is None:
|
|
167
|
+
msg = (
|
|
168
|
+
f"expected registered actitiby implementation to be a temporal activity"
|
|
169
|
+
f" decorated with @activity.defn(name=<name>), found: {act}"
|
|
170
|
+
)
|
|
171
|
+
raise ValueError(msg)
|
|
172
|
+
if act_defn.name is None:
|
|
173
|
+
msg = (
|
|
174
|
+
"missing activity definition name, please register your activities"
|
|
175
|
+
" with an explicit name: @activity.defn(name=<name>)"
|
|
176
|
+
)
|
|
177
|
+
raise ValueError(msg)
|
|
178
|
+
return act_defn.name
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from collections.abc import Callable
|
|
2
|
+
from collections.abc import Awaitable, Callable
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from datetime import UTC, datetime
|
|
5
5
|
from enum import StrEnum, unique
|
|
6
|
-
from typing import Any, Literal, Self
|
|
6
|
+
from typing import Any, Literal, Self, TypeVar
|
|
7
7
|
|
|
8
8
|
from temporalio import workflow
|
|
9
9
|
|
|
@@ -23,14 +23,26 @@ from pydantic.main import IncEx
|
|
|
23
23
|
logger = logging.getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
T = TypeVar("T")
|
|
27
|
+
Predicate = Callable[[T], bool] | Callable[[T], Awaitable[bool]]
|
|
28
|
+
|
|
29
|
+
|
|
26
30
|
class BaseModel(_BaseModel):
|
|
27
31
|
model_config = merge_configs(icij_config(), no_enum_values_config())
|
|
28
32
|
|
|
29
33
|
|
|
34
|
+
class BasePayload(_BaseModel):
|
|
35
|
+
model_config = icij_config()
|
|
36
|
+
|
|
37
|
+
|
|
30
38
|
class DatashareModel(BaseModel):
|
|
31
39
|
model_config = merge_configs(BaseModel.model_config, lowercamel_case_config())
|
|
32
40
|
|
|
33
41
|
|
|
42
|
+
class LowerCamelCaseModel(_BaseModel):
|
|
43
|
+
model_config = merge_configs(icij_config(), lowercamel_case_config())
|
|
44
|
+
|
|
45
|
+
|
|
34
46
|
@unique
|
|
35
47
|
class TaskState(StrEnum):
|
|
36
48
|
CREATED = "CREATED"
|
|
@@ -135,12 +147,14 @@ class TaskGroup:
|
|
|
135
147
|
class Document(DatashareModel):
|
|
136
148
|
id: str
|
|
137
149
|
root_document: str
|
|
138
|
-
content: str
|
|
139
150
|
language: str
|
|
151
|
+
content: str | None = None
|
|
152
|
+
content_type: str | None = None
|
|
140
153
|
tags: list[str] = Field(default_factory=list)
|
|
141
154
|
content_translated: dict[str, str] = Field(
|
|
142
155
|
default_factory=dict, alias="content_translated"
|
|
143
156
|
)
|
|
157
|
+
type: str = Field(default="Document", frozen=True)
|
|
144
158
|
|
|
145
159
|
@classmethod
|
|
146
160
|
def from_es(cls, es_doc: dict) -> Self:
|
|
@@ -71,12 +71,12 @@ def _update_pyproject_toml(
|
|
|
71
71
|
) -> dict[str, Any]:
|
|
72
72
|
pyproject_toml = deepcopy(pyproject_toml)
|
|
73
73
|
|
|
74
|
-
pyproject_toml["tool"]["uv"].pop("sources")
|
|
75
|
-
pyproject_toml["tool"]["uv"].pop("index")
|
|
74
|
+
pyproject_toml["tool"]["uv"].pop("sources", None)
|
|
75
|
+
pyproject_toml["tool"]["uv"].pop("index", None)
|
|
76
76
|
|
|
77
77
|
project = pyproject_toml["project"]
|
|
78
78
|
project["authors"] = []
|
|
79
|
-
project.pop("urls")
|
|
79
|
+
project.pop("urls", None)
|
|
80
80
|
project["dependencies"] = sorted(
|
|
81
81
|
d
|
|
82
82
|
for d in project["dependencies"]
|
|
@@ -87,7 +87,7 @@ def _update_pyproject_toml(
|
|
|
87
87
|
for d in project["dependencies"]
|
|
88
88
|
if any(d.startswith(base) for base in _BASE_DEPS)
|
|
89
89
|
)
|
|
90
|
-
project.pop("optional-dependencies")
|
|
90
|
+
project.pop("optional-dependencies", None)
|
|
91
91
|
|
|
92
92
|
entry_points = project["entry-points"]
|
|
93
93
|
|
|
@@ -105,4 +105,11 @@ def _update_pyproject_toml(
|
|
|
105
105
|
i if i != "worker_template" else package_name for i in hatch_sdist["packages"]
|
|
106
106
|
]
|
|
107
107
|
|
|
108
|
+
hatch_sdist = pyproject_toml["tool"]["hatch"]["build"]["targets"]["sdist"]
|
|
109
|
+
if "only-include" in hatch_sdist:
|
|
110
|
+
hatch_sdist["only-include"] = [
|
|
111
|
+
i if i != "worker_template" else package_name
|
|
112
|
+
for i in hatch_sdist["only-include"]
|
|
113
|
+
]
|
|
114
|
+
|
|
108
115
|
return pyproject_toml
|