runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,122 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any, Dict, Union
5
+
6
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
7
+ from runnable import defaults, utils
8
+
9
+ logger = logging.getLogger(defaults.LOGGER_NAME)
10
+
11
+
12
+ class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
13
+ """
14
+ File system run log store but chunks the run log into thread safe chunks.
15
+ This enables executions to be parallel.
16
+ """
17
+
18
+ service_name: str = "chunked-fs"
19
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
20
+
21
+ def get_summary(self) -> Dict[str, Any]:
22
+ summary = {"Type": self.service_name, "Location": self.log_folder}
23
+
24
+ return summary
25
+
26
+ def _exists(self, run_id: str, name: str) -> bool:
27
+ """
28
+ Check if a file exists in the log folder.
29
+
30
+ Args:
31
+ run_id (str): The run id
32
+ name (str): The exact file name to check
33
+
34
+ Returns:
35
+ bool: True if file exists, False otherwise
36
+ """
37
+ log_folder = self.log_folder_with_run_id(run_id=run_id)
38
+ file_path = log_folder / self.safe_suffix_json(name)
39
+ return file_path.exists()
40
+
41
+ def _list_branch_logs(self, run_id: str) -> list[str]:
42
+ """
43
+ List all branch log file names for a run_id.
44
+
45
+ Args:
46
+ run_id (str): The run id
47
+
48
+ Returns:
49
+ list[str]: List of branch log file names without .json extension
50
+ """
51
+ log_folder = self.log_folder_with_run_id(run_id=run_id)
52
+ if not log_folder.exists():
53
+ return []
54
+
55
+ # Find all files starting with "BranchLog-"
56
+ branch_files = list(log_folder.glob("BranchLog-*.json"))
57
+ # Return file names without path and without .json extension
58
+ return [f.stem for f in branch_files]
59
+
60
+ def log_folder_with_run_id(self, run_id: str) -> Path:
61
+ """
62
+ Utility function to get the log folder for a run id.
63
+
64
+ Args:
65
+ run_id (str): The run id
66
+
67
+ Returns:
68
+ Path: The path to the log folder with the run id
69
+ """
70
+ return Path(self.log_folder) / run_id
71
+
72
+ def safe_suffix_json(self, name: Union[Path, str]) -> str:
73
+ """
74
+ Safely attach a suffix to a json file.
75
+
76
+ Args:
77
+ name (Path): The name of the file with or without suffix of json
78
+
79
+ Returns:
80
+ str : The name of the file with .json
81
+ """
82
+ if str(name).endswith("json"):
83
+ return str(name)
84
+
85
+ return str(name) + ".json"
86
+
87
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
88
+ """
89
+ Store the contents against the name in the folder.
90
+
91
+ Args:
92
+ run_id (str): The run id
93
+ contents (dict): The dict to store
94
+ name (str): The name to store as (without path)
95
+ insert (bool): Whether this is a new insert (unused, kept for compatibility)
96
+ """
97
+ log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
98
+ file_path = log_folder_with_run_id / name
99
+
100
+ utils.safe_make_dir(log_folder_with_run_id)
101
+
102
+ with open(self.safe_suffix_json(file_path), "w") as fw:
103
+ json.dump(contents, fw, ensure_ascii=True, indent=4)
104
+
105
+ def _retrieve(self, run_id: str, name: str) -> dict:
106
+ """
107
+ Does the job of retrieving from the folder.
108
+
109
+ Args:
110
+ run_id (str): The run id
111
+ name (str): the name of the file to retrieve (without path)
112
+
113
+ Returns:
114
+ dict: The contents
115
+ """
116
+ log_folder_with_run_id = self.log_folder_with_run_id(run_id=run_id)
117
+ file_path = log_folder_with_run_id / name
118
+
119
+ with open(self.safe_suffix_json(file_path), "r") as fr:
120
+ contents = json.load(fr)
121
+
122
+ return contents
@@ -0,0 +1,141 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from typing import Any, Dict
5
+
6
+ from cloudpathlib import S3Client, S3Path
7
+ from pydantic import Field, SecretStr
8
+
9
+ from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
10
+ from runnable import defaults
11
+
12
+ logger = logging.getLogger(defaults.LOGGER_NAME)
13
+
14
+
15
+ @lru_cache
16
+ def get_minio_client(
17
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
18
+ ) -> S3Client:
19
+ return S3Client(
20
+ endpoint_url=endpoint_url,
21
+ aws_access_key_id=aws_access_key_id,
22
+ aws_secret_access_key=aws_secret_access_key,
23
+ )
24
+
25
+
26
+ class ChunkedMinioRunLogStore(ChunkedRunLogStore):
27
+ """
28
+ File system run log store but chunks the run log into thread safe chunks.
29
+ This enables executions to be parallel.
30
+ """
31
+
32
+ service_name: str = "chunked-minio"
33
+ endpoint_url: str = Field(default="http://localhost:9002")
34
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
35
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
36
+ bucket: str = Field(default="runnable/run-logs")
37
+
38
+ def get_summary(self) -> Dict[str, Any]:
39
+ summary = {
40
+ "Type": self.service_name,
41
+ "Location": f"{self.endpoint_url}/{self.bucket}",
42
+ }
43
+
44
+ return summary
45
+
46
+ def get_run_log_bucket(self) -> S3Path:
47
+ run_id = self._context.run_id
48
+
49
+ return S3Path(
50
+ f"s3://{self.bucket}/{run_id}/",
51
+ client=get_minio_client(
52
+ self.endpoint_url,
53
+ self.aws_access_key_id.get_secret_value(),
54
+ self.aws_secret_access_key.get_secret_value(),
55
+ ),
56
+ )
57
+
58
+ def _exists(self, run_id: str, name: str) -> bool:
59
+ """
60
+ Check if a file exists in the Minio bucket.
61
+
62
+ Args:
63
+ run_id (str): The run id
64
+ name (str): The exact file name to check
65
+
66
+ Returns:
67
+ bool: True if file exists, False otherwise
68
+ """
69
+ run_log_bucket = self.get_run_log_bucket()
70
+ file_path = run_log_bucket / name
71
+ return file_path.exists()
72
+
73
+ def _list_branch_logs(self, run_id: str) -> list[str]:
74
+ """
75
+ List all branch log file names for a run_id.
76
+
77
+ Args:
78
+ run_id (str): The run id
79
+
80
+ Returns:
81
+ list[str]: List of branch log file names (e.g., ["BranchLog-map.1", "BranchLog-map.2"])
82
+ """
83
+ run_log_bucket = self.get_run_log_bucket()
84
+ if not run_log_bucket.exists():
85
+ return []
86
+
87
+ # Find all files starting with "BranchLog-"
88
+ branch_files = list(run_log_bucket.glob("BranchLog-*"))
89
+ # Return file names without path (just the name)
90
+ return [f.name for f in branch_files]
91
+
92
+ def _store(self, run_id: str, contents: dict, name: str, insert=False):
93
+ """
94
+ Store the contents against the name in the folder.
95
+
96
+ Args:
97
+ run_id (str): The run id
98
+ contents (dict): The dict to store
99
+ name (str): The name to store as (without path)
100
+ insert (bool): Whether this is a new insert (unused, kept for compatibility)
101
+ """
102
+ run_log_bucket = self.get_run_log_bucket()
103
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
104
+
105
+ file_path = str(run_log_bucket / name)
106
+ obj = S3Path(
107
+ file_path,
108
+ client=get_minio_client(
109
+ self.endpoint_url,
110
+ self.aws_access_key_id.get_secret_value(),
111
+ self.aws_secret_access_key.get_secret_value(),
112
+ ),
113
+ )
114
+
115
+ obj.write_text(json.dumps(contents, ensure_ascii=True, indent=4))
116
+
117
+ def _retrieve(self, run_id: str, name: str) -> dict:
118
+ """
119
+ Does the job of retrieving from the folder.
120
+
121
+ Args:
122
+ run_id (str): The run id
123
+ name (str): the name of the file to retrieve (without path)
124
+
125
+ Returns:
126
+ dict: The contents
127
+ """
128
+ run_log_bucket = self.get_run_log_bucket()
129
+ file_path = str(run_log_bucket / name)
130
+
131
+ obj = S3Path(
132
+ file_path,
133
+ client=get_minio_client(
134
+ self.endpoint_url,
135
+ self.aws_access_key_id.get_secret_value(),
136
+ self.aws_secret_access_key.get_secret_value(),
137
+ ),
138
+ )
139
+
140
+ run_log_text = json.loads(obj.read_text())
141
+ return run_log_text
@@ -0,0 +1,91 @@
1
+ import json
2
+ import logging
3
+ from pathlib import Path
4
+ from typing import Any, Dict
5
+
6
+ from extensions.run_log_store.any_path import AnyPathRunLogStore
7
+ from runnable import defaults, utils
8
+ from runnable.datastore import RunLog
9
+
10
+ logger = logging.getLogger(defaults.LOGGER_NAME)
11
+
12
+
13
+ class FileSystemRunLogstore(AnyPathRunLogStore):
14
+ """
15
+ In this type of Run Log store, we use a file system to store the JSON run log.
16
+
17
+ Every single run is stored as a different file which makes it compatible across other store types.
18
+
19
+ When to use:
20
+ When locally testing a pipeline and have the need to compare across runs.
21
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
22
+
23
+ Do not use:
24
+ If you need parallelization on local, this run log would not support it.
25
+
26
+ Example config:
27
+
28
+ run_log:
29
+ type: file-system
30
+ config:
31
+ log_folder: The folder to out the logs. Defaults to .run_log_store
32
+
33
+ """
34
+
35
+ service_name: str = "file-system"
36
+ log_folder: str = defaults.LOG_LOCATION_FOLDER
37
+
38
+ @property
39
+ def log_folder_name(self):
40
+ return self.log_folder
41
+
42
+ def get_summary(self) -> Dict[str, Any]:
43
+ summary = {"Type": self.service_name, "Location": self.log_folder}
44
+
45
+ return summary
46
+
47
+ def write_to_path(self, run_log: RunLog):
48
+ """
49
+ Write the run log to the folder
50
+
51
+ Args:
52
+ run_log (RunLog): The run log to be added to the database
53
+ """
54
+ write_to = self.log_folder_name
55
+ utils.safe_make_dir(write_to)
56
+
57
+ write_to_path = Path(write_to)
58
+ run_id = run_log.run_id
59
+ json_file_path = write_to_path / f"{run_id}.json"
60
+
61
+ with json_file_path.open("w") as fw:
62
+ json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
63
+
64
+ def read_from_path(self, run_id: str) -> RunLog:
65
+ """
66
+ Look into the run log folder for the run log for the run id.
67
+
68
+ If the run log does not exist, raise an exception. If it does, decode it
69
+ as a RunLog and return it
70
+
71
+ Args:
72
+ run_id (str): The requested run id to retrieve the run log store
73
+
74
+ Raises:
75
+ FileNotFoundError: If the Run Log has not been found.
76
+
77
+ Returns:
78
+ RunLog: The decoded Run log
79
+ """
80
+ write_to = self.log_folder_name
81
+
82
+ read_from_path = Path(write_to)
83
+ json_file_path = read_from_path / f"{run_id}.json"
84
+
85
+ if not json_file_path.exists():
86
+ raise FileNotFoundError(f"Expected {json_file_path} is not present")
87
+
88
+ with json_file_path.open("r") as fr:
89
+ json_str = json.load(fr)
90
+ run_log = RunLog(**json_str) # pylint: disable=no-member
91
+ return run_log