runnable 0.50.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- extensions/README.md +0 -0
- extensions/__init__.py +0 -0
- extensions/catalog/README.md +0 -0
- extensions/catalog/any_path.py +214 -0
- extensions/catalog/file_system.py +52 -0
- extensions/catalog/minio.py +72 -0
- extensions/catalog/pyproject.toml +14 -0
- extensions/catalog/s3.py +11 -0
- extensions/job_executor/README.md +0 -0
- extensions/job_executor/__init__.py +236 -0
- extensions/job_executor/emulate.py +70 -0
- extensions/job_executor/k8s.py +553 -0
- extensions/job_executor/k8s_job_spec.yaml +37 -0
- extensions/job_executor/local.py +35 -0
- extensions/job_executor/local_container.py +161 -0
- extensions/job_executor/pyproject.toml +16 -0
- extensions/nodes/README.md +0 -0
- extensions/nodes/__init__.py +0 -0
- extensions/nodes/conditional.py +301 -0
- extensions/nodes/fail.py +78 -0
- extensions/nodes/loop.py +394 -0
- extensions/nodes/map.py +477 -0
- extensions/nodes/parallel.py +281 -0
- extensions/nodes/pyproject.toml +15 -0
- extensions/nodes/stub.py +93 -0
- extensions/nodes/success.py +78 -0
- extensions/nodes/task.py +156 -0
- extensions/pipeline_executor/README.md +0 -0
- extensions/pipeline_executor/__init__.py +871 -0
- extensions/pipeline_executor/argo.py +1266 -0
- extensions/pipeline_executor/emulate.py +119 -0
- extensions/pipeline_executor/local.py +226 -0
- extensions/pipeline_executor/local_container.py +369 -0
- extensions/pipeline_executor/mocked.py +159 -0
- extensions/pipeline_executor/pyproject.toml +16 -0
- extensions/run_log_store/README.md +0 -0
- extensions/run_log_store/__init__.py +0 -0
- extensions/run_log_store/any_path.py +100 -0
- extensions/run_log_store/chunked_fs.py +122 -0
- extensions/run_log_store/chunked_minio.py +141 -0
- extensions/run_log_store/file_system.py +91 -0
- extensions/run_log_store/generic_chunked.py +549 -0
- extensions/run_log_store/minio.py +114 -0
- extensions/run_log_store/pyproject.toml +15 -0
- extensions/secrets/README.md +0 -0
- extensions/secrets/dotenv.py +62 -0
- extensions/secrets/pyproject.toml +15 -0
- runnable/__init__.py +108 -0
- runnable/catalog.py +141 -0
- runnable/cli.py +484 -0
- runnable/context.py +730 -0
- runnable/datastore.py +1058 -0
- runnable/defaults.py +159 -0
- runnable/entrypoints.py +390 -0
- runnable/exceptions.py +137 -0
- runnable/executor.py +561 -0
- runnable/gantt.py +1646 -0
- runnable/graph.py +501 -0
- runnable/names.py +546 -0
- runnable/nodes.py +593 -0
- runnable/parameters.py +217 -0
- runnable/pickler.py +96 -0
- runnable/sdk.py +1277 -0
- runnable/secrets.py +92 -0
- runnable/tasks.py +1268 -0
- runnable/telemetry.py +142 -0
- runnable/utils.py +423 -0
- runnable-0.50.0.dist-info/METADATA +189 -0
- runnable-0.50.0.dist-info/RECORD +72 -0
- runnable-0.50.0.dist-info/WHEEL +4 -0
- runnable-0.50.0.dist-info/entry_points.txt +53 -0
- runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, Union
|
|
5
|
+
|
|
6
|
+
from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
|
|
7
|
+
from runnable import defaults, utils
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
|
13
|
+
"""
|
|
14
|
+
File system run log store but chunks the run log into thread safe chunks.
|
|
15
|
+
This enables executions to be parallel.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
service_name: str = "chunked-fs"
|
|
19
|
+
log_folder: str = defaults.LOG_LOCATION_FOLDER
|
|
20
|
+
|
|
21
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
22
|
+
summary = {"Type": self.service_name, "Location": self.log_folder}
|
|
23
|
+
|
|
24
|
+
return summary
|
|
25
|
+
|
|
26
|
+
def _exists(self, run_id: str, name: str) -> bool:
|
|
27
|
+
"""
|
|
28
|
+
Check if a file exists in the log folder.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
run_id (str): The run id
|
|
32
|
+
name (str): The exact file name to check
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
bool: True if file exists, False otherwise
|
|
36
|
+
"""
|
|
37
|
+
log_folder = self.log_folder_with_run_id(run_id=run_id)
|
|
38
|
+
file_path = log_folder / self.safe_suffix_json(name)
|
|
39
|
+
return file_path.exists()
|
|
40
|
+
|
|
41
|
+
def _list_branch_logs(self, run_id: str) -> list[str]:
|
|
42
|
+
"""
|
|
43
|
+
List all branch log file names for a run_id.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
run_id (str): The run id
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
list[str]: List of branch log file names without .json extension
|
|
50
|
+
"""
|
|
51
|
+
log_folder = self.log_folder_with_run_id(run_id=run_id)
|
|
52
|
+
if not log_folder.exists():
|
|
53
|
+
return []
|
|
54
|
+
|
|
55
|
+
# Find all files starting with "BranchLog-"
|
|
56
|
+
branch_files = list(log_folder.glob("BranchLog-*.json"))
|
|
57
|
+
# Return file names without path and without .json extension
|
|
58
|
+
return [f.stem for f in branch_files]
|
|
59
|
+
|
|
60
|
+
def log_folder_with_run_id(self, run_id: str) -> Path:
|
|
61
|
+
"""
|
|
62
|
+
Utility function to get the log folder for a run id.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
run_id (str): The run id
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Path: The path to the log folder with the run id
|
|
69
|
+
"""
|
|
70
|
+
return Path(self.log_folder) / run_id
|
|
71
|
+
|
|
72
|
+
def safe_suffix_json(self, name: Union[Path, str]) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Safely attach a suffix to a json file.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name (Path): The name of the file with or without suffix of json
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
str : The name of the file with .json
|
|
81
|
+
"""
|
|
82
|
+
if str(name).endswith("json"):
|
|
83
|
+
return str(name)
|
|
84
|
+
|
|
85
|
+
return str(name) + ".json"
|
|
86
|
+
|
|
87
|
+
def _store(self, run_id: str, contents: dict, name: str, insert=False):
|
|
88
|
+
"""
|
|
89
|
+
Store the contents against the name in the folder.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
run_id (str): The run id
|
|
93
|
+
contents (dict): The dict to store
|
|
94
|
+
name (str): The name to store as (without path)
|
|
95
|
+
insert (bool): Whether this is a new insert (unused, kept for compatibility)
|
|
96
|
+
"""
|
|
97
|
+
log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
|
|
98
|
+
file_path = log_folder_with_run_id / name
|
|
99
|
+
|
|
100
|
+
utils.safe_make_dir(log_folder_with_run_id)
|
|
101
|
+
|
|
102
|
+
with open(self.safe_suffix_json(file_path), "w") as fw:
|
|
103
|
+
json.dump(contents, fw, ensure_ascii=True, indent=4)
|
|
104
|
+
|
|
105
|
+
def _retrieve(self, run_id: str, name: str) -> dict:
|
|
106
|
+
"""
|
|
107
|
+
Does the job of retrieving from the folder.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
run_id (str): The run id
|
|
111
|
+
name (str): the name of the file to retrieve (without path)
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
dict: The contents
|
|
115
|
+
"""
|
|
116
|
+
log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
|
|
117
|
+
file_path = log_folder_with_run_id / name
|
|
118
|
+
|
|
119
|
+
with open(self.safe_suffix_json(file_path), "r") as fr:
|
|
120
|
+
contents = json.load(fr)
|
|
121
|
+
|
|
122
|
+
return contents
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
from cloudpathlib import S3Client, S3Path
|
|
7
|
+
from pydantic import Field, SecretStr
|
|
8
|
+
|
|
9
|
+
from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
|
|
10
|
+
from runnable import defaults
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@lru_cache
|
|
16
|
+
def get_minio_client(
|
|
17
|
+
endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
|
|
18
|
+
) -> S3Client:
|
|
19
|
+
return S3Client(
|
|
20
|
+
endpoint_url=endpoint_url,
|
|
21
|
+
aws_access_key_id=aws_access_key_id,
|
|
22
|
+
aws_secret_access_key=aws_secret_access_key,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ChunkedMinioRunLogStore(ChunkedRunLogStore):
|
|
27
|
+
"""
|
|
28
|
+
File system run log store but chunks the run log into thread safe chunks.
|
|
29
|
+
This enables executions to be parallel.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
service_name: str = "chunked-minio"
|
|
33
|
+
endpoint_url: str = Field(default="http://localhost:9002")
|
|
34
|
+
aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
|
|
35
|
+
aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
|
|
36
|
+
bucket: str = Field(default="runnable/run-logs")
|
|
37
|
+
|
|
38
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
39
|
+
summary = {
|
|
40
|
+
"Type": self.service_name,
|
|
41
|
+
"Location": f"{self.endpoint_url}/{self.bucket}",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return summary
|
|
45
|
+
|
|
46
|
+
def get_run_log_bucket(self) -> S3Path:
|
|
47
|
+
run_id = self._context.run_id
|
|
48
|
+
|
|
49
|
+
return S3Path(
|
|
50
|
+
f"s3://{self.bucket}/{run_id}/",
|
|
51
|
+
client=get_minio_client(
|
|
52
|
+
self.endpoint_url,
|
|
53
|
+
self.aws_access_key_id.get_secret_value(),
|
|
54
|
+
self.aws_secret_access_key.get_secret_value(),
|
|
55
|
+
),
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def _exists(self, run_id: str, name: str) -> bool:
|
|
59
|
+
"""
|
|
60
|
+
Check if a file exists in the Minio bucket.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
run_id (str): The run id
|
|
64
|
+
name (str): The exact file name to check
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
bool: True if file exists, False otherwise
|
|
68
|
+
"""
|
|
69
|
+
run_log_bucket = self.get_run_log_bucket()
|
|
70
|
+
file_path = run_log_bucket / name
|
|
71
|
+
return file_path.exists()
|
|
72
|
+
|
|
73
|
+
def _list_branch_logs(self, run_id: str) -> list[str]:
|
|
74
|
+
"""
|
|
75
|
+
List all branch log file names for a run_id.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
run_id (str): The run id
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
list[str]: List of branch log file names (e.g., ["BranchLog-map.1", "BranchLog-map.2"])
|
|
82
|
+
"""
|
|
83
|
+
run_log_bucket = self.get_run_log_bucket()
|
|
84
|
+
if not run_log_bucket.exists():
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
# Find all files starting with "BranchLog-"
|
|
88
|
+
branch_files = list(run_log_bucket.glob("BranchLog-*"))
|
|
89
|
+
# Return file names without path (just the name)
|
|
90
|
+
return [f.name for f in branch_files]
|
|
91
|
+
|
|
92
|
+
def _store(self, run_id: str, contents: dict, name: str, insert=False):
|
|
93
|
+
"""
|
|
94
|
+
Store the contents against the name in the folder.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
run_id (str): The run id
|
|
98
|
+
contents (dict): The dict to store
|
|
99
|
+
name (str): The name to store as (without path)
|
|
100
|
+
insert (bool): Whether this is a new insert (unused, kept for compatibility)
|
|
101
|
+
"""
|
|
102
|
+
run_log_bucket = self.get_run_log_bucket()
|
|
103
|
+
run_log_bucket.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
|
|
105
|
+
file_path = str(run_log_bucket / name)
|
|
106
|
+
obj = S3Path(
|
|
107
|
+
file_path,
|
|
108
|
+
client=get_minio_client(
|
|
109
|
+
self.endpoint_url,
|
|
110
|
+
self.aws_access_key_id.get_secret_value(),
|
|
111
|
+
self.aws_secret_access_key.get_secret_value(),
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
|
|
116
|
+
|
|
117
|
+
def _retrieve(self, run_id: str, name: str) -> dict:
|
|
118
|
+
"""
|
|
119
|
+
Does the job of retrieving from the folder.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
run_id (str): The run id
|
|
123
|
+
name (str): the name of the file to retrieve (without path)
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
dict: The contents
|
|
127
|
+
"""
|
|
128
|
+
run_log_bucket = self.get_run_log_bucket()
|
|
129
|
+
file_path = str(run_log_bucket / name)
|
|
130
|
+
|
|
131
|
+
obj = S3Path(
|
|
132
|
+
file_path,
|
|
133
|
+
client=get_minio_client(
|
|
134
|
+
self.endpoint_url,
|
|
135
|
+
self.aws_access_key_id.get_secret_value(),
|
|
136
|
+
self.aws_secret_access_key.get_secret_value(),
|
|
137
|
+
),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
run_log_text = json.loads(obj.read_text())
|
|
141
|
+
return run_log_text
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict
|
|
5
|
+
|
|
6
|
+
from extensions.run_log_store.any_path import AnyPathRunLogStore
|
|
7
|
+
from runnable import defaults, utils
|
|
8
|
+
from runnable.datastore import RunLog
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class FileSystemRunLogstore(AnyPathRunLogStore):
|
|
14
|
+
"""
|
|
15
|
+
In this type of Run Log store, we use a file system to store the JSON run log.
|
|
16
|
+
|
|
17
|
+
Every single run is stored as a different file which makes it compatible across other store types.
|
|
18
|
+
|
|
19
|
+
When to use:
|
|
20
|
+
When locally testing a pipeline and have the need to compare across runs.
|
|
21
|
+
Its fully featured and perfectly fine if your local environment is where you would do everything.
|
|
22
|
+
|
|
23
|
+
Do not use:
|
|
24
|
+
If you need parallelization on local, this run log would not support it.
|
|
25
|
+
|
|
26
|
+
Example config:
|
|
27
|
+
|
|
28
|
+
run_log:
|
|
29
|
+
type: file-system
|
|
30
|
+
config:
|
|
31
|
+
log_folder: The folder to out the logs. Defaults to .run_log_store
|
|
32
|
+
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
service_name: str = "file-system"
|
|
36
|
+
log_folder: str = defaults.LOG_LOCATION_FOLDER
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def log_folder_name(self):
|
|
40
|
+
return self.log_folder
|
|
41
|
+
|
|
42
|
+
def get_summary(self) -> Dict[str, Any]:
|
|
43
|
+
summary = {"Type": self.service_name, "Location": self.log_folder}
|
|
44
|
+
|
|
45
|
+
return summary
|
|
46
|
+
|
|
47
|
+
def write_to_path(self, run_log: RunLog):
|
|
48
|
+
"""
|
|
49
|
+
Write the run log to the folder
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
run_log (RunLog): The run log to be added to the database
|
|
53
|
+
"""
|
|
54
|
+
write_to = self.log_folder_name
|
|
55
|
+
utils.safe_make_dir(write_to)
|
|
56
|
+
|
|
57
|
+
write_to_path = Path(write_to)
|
|
58
|
+
run_id = run_log.run_id
|
|
59
|
+
json_file_path = write_to_path / f"{run_id}.json"
|
|
60
|
+
|
|
61
|
+
with json_file_path.open("w") as fw:
|
|
62
|
+
json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
|
|
63
|
+
|
|
64
|
+
def read_from_path(self, run_id: str) -> RunLog:
|
|
65
|
+
"""
|
|
66
|
+
Look into the run log folder for the run log for the run id.
|
|
67
|
+
|
|
68
|
+
If the run log does not exist, raise an exception. If it does, decode it
|
|
69
|
+
as a RunLog and return it
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
run_id (str): The requested run id to retrieve the run log store
|
|
73
|
+
|
|
74
|
+
Raises:
|
|
75
|
+
FileNotFoundError: If the Run Log has not been found.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
RunLog: The decoded Run log
|
|
79
|
+
"""
|
|
80
|
+
write_to = self.log_folder_name
|
|
81
|
+
|
|
82
|
+
read_from_path = Path(write_to)
|
|
83
|
+
json_file_path = read_from_path / f"{run_id}.json"
|
|
84
|
+
|
|
85
|
+
if not json_file_path.exists():
|
|
86
|
+
raise FileNotFoundError(f"Expected {json_file_path} is not present")
|
|
87
|
+
|
|
88
|
+
with json_file_path.open("r") as fr:
|
|
89
|
+
json_str = json.load(fr)
|
|
90
|
+
run_log = RunLog(**json_str) # pylint: disable=no-member
|
|
91
|
+
return run_log
|