runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
extensions/README.md ADDED
File without changes
extensions/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,214 @@
1
+ import logging
2
+ import os
3
+ import shutil
4
+ from abc import abstractmethod
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List
7
+
8
+ from cloudpathlib import CloudPath
9
+
10
+ from runnable import defaults, utils
11
+ from runnable.catalog import BaseCatalog
12
+ from runnable.datastore import DataCatalog
13
+
14
+ logger = logging.getLogger(defaults.LOGGER_NAME)
15
+
16
+
17
+ class AnyPathCatalog(BaseCatalog):
18
+ """
19
+ A Catalog handler that uses the local file system for cataloging.
20
+
21
+ Note: Do not use this if the steps of the pipeline run on different compute environments.
22
+
23
+ Example config:
24
+
25
+ catalog:
26
+ type: file-system
27
+ config:
28
+ catalog_location: The location to store the catalog.
29
+ compute_data_folder: The folder to source the data from.
30
+
31
+ """
32
+
33
+ @abstractmethod
34
+ def get_summary(self) -> Dict[str, Any]: ...
35
+
36
+ @abstractmethod
37
+ def upload_to_catalog(self, file: Path) -> None: ...
38
+
39
+ @abstractmethod
40
+ def download_from_catalog(self, file: Path | CloudPath) -> None: ...
41
+
42
+ @abstractmethod
43
+ def get_catalog_location(self) -> Path | CloudPath:
44
+ """
45
+ For local file systems, this is the .catalog/run_id/compute_data_folder
46
+ For cloud systems, this is s3://bucket/run_id/compute_data_folder
47
+ """
48
+ ...
49
+
50
+ def get(self, name: str) -> List[DataCatalog]:
51
+ """
52
+ Get the file by matching glob pattern to the name
53
+
54
+ Args:
55
+ name ([str]): A glob matching the file name
56
+ run_id ([str]): The run id
57
+
58
+ Raises:
59
+ Exception: If the catalog location does not exist
60
+
61
+ Returns:
62
+ List(object) : A list of catalog objects
63
+ """
64
+ run_catalog = self.get_catalog_location()
65
+
66
+ # Iterate through the contents of the run_catalog and copy the files that fit the name pattern
67
+ # We should also return a list of data hashes
68
+ glob_files = run_catalog.glob(name)
69
+ logger.debug(
70
+ f"Glob identified {glob_files} as matches to from the catalog location: {run_catalog}"
71
+ )
72
+
73
+ data_catalogs = []
74
+ run_log_store = self._context.run_log_store
75
+ for file in glob_files:
76
+ if file.is_dir():
77
+ # Need not add a data catalog for the folder
78
+ continue
79
+
80
+ if str(file).endswith(".execution.log"):
81
+ continue
82
+
83
+ self.download_from_catalog(file)
84
+ relative_file_path = file.relative_to(run_catalog) # type: ignore
85
+
86
+ data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
87
+ data_catalog.catalog_relative_path = str(relative_file_path)
88
+ data_catalog.data_hash = utils.get_data_hash(str(relative_file_path))
89
+ data_catalog.stage = "get"
90
+ data_catalogs.append(data_catalog)
91
+
92
+ if not data_catalogs:
93
+ raise Exception(f"Did not find any files matching {name} in {run_catalog}")
94
+
95
+ return data_catalogs
96
+
97
+ def put(
98
+ self,
99
+ name: str,
100
+ allow_file_not_found_exc: bool = False,
101
+ store_copy: bool = True,
102
+ ) -> List[DataCatalog]:
103
+ """
104
+ Put the files matching the glob pattern into the catalog.
105
+
106
+ If previously synced catalogs are provided, and no changes were observed, we do not sync them.
107
+
108
+ Args:
109
+ name (str): The glob pattern of the files to catalog
110
+ run_id (str): The run id of the run
111
+ compute_data_folder (str, optional): The compute data folder to sync from. Defaults to settings default.
112
+ synced_catalogs (dict, optional): dictionary of previously synced catalogs. Defaults to None.
113
+
114
+ Raises:
115
+ Exception: If the compute data folder does not exist.
116
+
117
+ Returns:
118
+ List(object) : A list of catalog objects
119
+ """
120
+ run_id = self._context.run_id
121
+ logger.info(
122
+ f"Using the {self.service_name} catalog and trying to put {name} for run_id: {run_id}"
123
+ )
124
+
125
+ copy_from = Path(self.compute_data_folder)
126
+
127
+ if not copy_from.is_dir():
128
+ msg = (
129
+ f"Expected compute data folder to be present at: {copy_from} but not found. \n"
130
+ "Note: runnable does not create the compute data folder for you. Please ensure that the "
131
+ "folder exists.\n"
132
+ )
133
+ raise Exception(msg)
134
+
135
+ # Iterate through the contents of copy_from and if the name matches, we move them to the run_catalog
136
+ # We should also return a list of datastore.DataCatalog items
137
+ glob_files = copy_from.glob(name)
138
+ logger.debug(
139
+ f"Glob identified {glob_files} as matches to from the compute data folder: {copy_from}"
140
+ )
141
+
142
+ data_catalogs = []
143
+ run_log_store = self._context.run_log_store
144
+ for file in glob_files:
145
+ if file.is_dir():
146
+ # Need not add a data catalog for the folder
147
+ continue
148
+
149
+ relative_file_path = file.relative_to(copy_from)
150
+
151
+ data_catalog = run_log_store.create_data_catalog(str(relative_file_path))
152
+ data_catalog.catalog_relative_path = (
153
+ run_id + os.sep + str(relative_file_path)
154
+ )
155
+ data_catalog.data_hash = utils.get_data_hash(str(file))
156
+ data_catalog.stage = "put"
157
+ data_catalogs.append(data_catalog)
158
+
159
+ # TODO: Think about syncing only if the file is changed
160
+ if store_copy:
161
+ logger.debug(
162
+ f"Copying file {file} to the catalog location for run_id: {run_id}"
163
+ )
164
+ self.upload_to_catalog(file)
165
+ else:
166
+ logger.debug(
167
+ f"Not copying file {file} to the catalog location for run_id: {run_id}"
168
+ )
169
+
170
+ if not data_catalogs and not allow_file_not_found_exc:
171
+ raise Exception(f"Did not find any files matching {name} in {copy_from}")
172
+
173
+ return data_catalogs
174
+
175
+ def sync_between_runs(self, previous_run_id: str, run_id: str):
176
+ """
177
+ Given the previous run id, sync the catalogs between the current one and previous
178
+
179
+ Args:
180
+ previous_run_id (str): The previous run id to sync the catalogs from
181
+ run_id (str): The run_id to which the data catalogs should be synced to.
182
+
183
+ Raises:
184
+ Exception: If the previous run log does not exist in the catalog
185
+
186
+ """
187
+ logger.info(
188
+ f"Using the {self.service_name} catalog and syncing catalogs"
189
+ "between old: {previous_run_id} to new: {run_id}"
190
+ )
191
+
192
+ catalog_location = Path(self.get_catalog_location())
193
+ run_catalog = catalog_location / run_id
194
+ utils.safe_make_dir(run_catalog)
195
+
196
+ if not utils.does_dir_exist(catalog_location / previous_run_id):
197
+ msg = (
198
+ f"Catalogs from previous run : {previous_run_id} are not found.\n"
199
+ "Note: Please provision the catalog objects generated by previous run in the same catalog location"
200
+ " as the current run, even if the catalog handler for the previous run was different"
201
+ )
202
+ raise Exception(msg)
203
+
204
+ cataloged_files = list((catalog_location / previous_run_id).glob("*"))
205
+
206
+ for cataloged_file in cataloged_files:
207
+ if str(cataloged_file).endswith("execution.log"):
208
+ continue
209
+
210
+ if cataloged_file.is_file():
211
+ shutil.copy(cataloged_file, run_catalog / cataloged_file.name)
212
+ else:
213
+ shutil.copytree(cataloged_file, run_catalog / cataloged_file.name)
214
+ logger.info(f"Copied file from: {cataloged_file} to {run_catalog}")
@@ -0,0 +1,52 @@
1
+ import logging
2
+ import shutil
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from cloudpathlib import CloudPath
7
+ from pydantic import Field
8
+
9
+ from extensions.catalog.any_path import AnyPathCatalog
10
+ from runnable import defaults
11
+
12
+ logger = logging.getLogger(defaults.LOGGER_NAME)
13
+
14
+
15
+ class FileSystemCatalog(AnyPathCatalog):
16
+ service_name: str = "file-system"
17
+
18
+ catalog_location: str = Field(default=defaults.CATALOG_LOCATION_FOLDER)
19
+
20
+ def get_summary(self) -> dict[str, Any]:
21
+ return {
22
+ "compute_data_folder": self.compute_data_folder,
23
+ "catalog_location": self.catalog_location,
24
+ }
25
+
26
+ def get_catalog_location(self) -> Path:
27
+ run_id = self._context.run_id
28
+ return Path(self.catalog_location) / run_id / self.compute_data_folder
29
+
30
+ def download_from_catalog(self, file: Path | CloudPath) -> None:
31
+ assert isinstance(file, Path)
32
+
33
+ run_catalog = self.get_catalog_location()
34
+ relative_file_path = file.relative_to(run_catalog)
35
+
36
+ copy_to = self.compute_data_folder
37
+ # Make the directory in the data folder if required
38
+ Path(copy_to / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
39
+ shutil.copy(file, copy_to / relative_file_path)
40
+
41
+ def upload_to_catalog(self, file: Path) -> None:
42
+ run_catalog = self.get_catalog_location()
43
+ run_catalog.mkdir(parents=True, exist_ok=True)
44
+
45
+ logger.debug(
46
+ f"Copying objects from {self.compute_data_folder} to the run catalog location of {run_catalog}"
47
+ )
48
+
49
+ relative_file_path = file.relative_to(self.compute_data_folder)
50
+
51
+ (run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
52
+ shutil.copy(file, run_catalog / relative_file_path)
@@ -0,0 +1,72 @@
1
+ import logging
2
+ from functools import lru_cache
3
+ from pathlib import Path
4
+ from typing import Any
5
+
6
+ from cloudpathlib import CloudPath, S3Client, S3Path
7
+ from pydantic import Field, SecretStr
8
+
9
+ from extensions.catalog.any_path import AnyPathCatalog
10
+ from runnable import defaults
11
+
12
+ logger = logging.getLogger(defaults.LOGGER_NAME)
13
+
14
+
15
+ @lru_cache
16
+ def get_minio_client(
17
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
18
+ ) -> S3Client:
19
+ return S3Client(
20
+ endpoint_url=endpoint_url,
21
+ aws_access_key_id=aws_access_key_id,
22
+ aws_secret_access_key=aws_secret_access_key,
23
+ )
24
+
25
+
26
+ class MinioCatalog(AnyPathCatalog):
27
+ service_name: str = "minio"
28
+
29
+ endpoint_url: str = Field(default="http://localhost:9002")
30
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
31
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
32
+ bucket: str = "runnable"
33
+
34
+ def get_summary(self) -> dict[str, Any]:
35
+ return {
36
+ "service_name": self.service_name,
37
+ "compute_data_folder": self.compute_data_folder,
38
+ "endpoint_url": self.endpoint_url,
39
+ "bucket": self.bucket,
40
+ }
41
+
42
+ def get_catalog_location(self) -> S3Path:
43
+ run_id = self._context.run_id
44
+
45
+ return S3Path(
46
+ f"s3://{self.bucket}/{run_id}/{self.compute_data_folder}".strip("."),
47
+ client=get_minio_client(
48
+ self.endpoint_url,
49
+ self.aws_access_key_id.get_secret_value(),
50
+ self.aws_secret_access_key.get_secret_value(),
51
+ ),
52
+ )
53
+
54
+ def download_from_catalog(self, file: Path | CloudPath) -> None:
55
+ assert isinstance(file, S3Path)
56
+
57
+ relative_file_path = file.relative_to(self.get_catalog_location())
58
+
59
+ file_to_download = Path(self.compute_data_folder) / relative_file_path
60
+ file_to_download.parent.mkdir(parents=True, exist_ok=True)
61
+
62
+ file.download_to(file_to_download)
63
+
64
+ def upload_to_catalog(self, file: Path) -> None:
65
+ run_catalog = self.get_catalog_location()
66
+
67
+ relative_file_path = file.relative_to(self.compute_data_folder)
68
+ (run_catalog / relative_file_path.parent).mkdir(parents=True, exist_ok=True)
69
+
70
+ file_in_cloud = run_catalog / file
71
+ assert isinstance(file_in_cloud, S3Path)
72
+ file_in_cloud.upload_from(file)
@@ -0,0 +1,14 @@
1
+ [project]
2
+ name = "catalog"
3
+ version = "0.0.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = []
8
+
9
+ [build-system]
10
+ requires = ["hatchling"]
11
+ build-backend = "hatchling.build"
12
+
13
+ [tool.hatch.build.targets.wheel]
14
+ packages = ["."]
@@ -0,0 +1,11 @@
1
+ from cloudpathlib import S3Path
2
+
3
+ from extensions.catalog.any_path import AnyPathCatalog
4
+
5
+
6
+ class S3Catalog(AnyPathCatalog):
7
+ service_name: str = "s3"
8
+
9
+ def get_path(self, path: str) -> S3Path:
10
+ # TODO: Might need to assert the credentials are set
11
+ return S3Path(path)
File without changes
@@ -0,0 +1,236 @@
1
+ import logging
2
+ import os
3
+ from typing import Dict, List, Optional
4
+
5
+ from runnable import (
6
+ console,
7
+ context,
8
+ defaults,
9
+ exceptions,
10
+ parameters,
11
+ task_console,
12
+ utils,
13
+ )
14
+ from runnable.datastore import DataCatalog, JobLog, JsonParameter, StepAttempt
15
+ from runnable.defaults import IterableParameterModel
16
+ from runnable.executor import BaseJobExecutor
17
+ from runnable.tasks import BaseTaskType
18
+
19
+ logger = logging.getLogger(defaults.LOGGER_NAME)
20
+
21
+
22
+ class GenericJobExecutor(BaseJobExecutor):
23
+ """
24
+ The skeleton of an executor class.
25
+ Any implementation of an executor should inherit this class and over-ride accordingly.
26
+
27
+ This is a loaded base class which has a lot of methods already implemented for "typical" executions.
28
+ Look at the function docs to understand how to use them appropriately.
29
+
30
+ For any implementation:
31
+ 1). Who/when should the run log be set up?
32
+ 2). Who/When should the step log be set up?
33
+
34
+ """
35
+
36
+ service_name: str = ""
37
+ service_type: str = "job_executor"
38
+ mock: bool = False
39
+
40
+ @property
41
+ def _context(self):
42
+ current_context = context.get_run_context()
43
+ if current_context is None:
44
+ raise RuntimeError("No run context available")
45
+ if not isinstance(current_context, context.JobContext):
46
+ raise TypeError(
47
+ f"Expected JobContext, got {type(current_context).__name__}"
48
+ )
49
+ return current_context
50
+
51
+ def _get_parameters(self) -> Dict[str, JsonParameter]:
52
+ """
53
+ Consolidate the parameters from the environment variables
54
+ and the parameters file.
55
+
56
+ The parameters defined in the environment variables take precedence over the parameters file.
57
+
58
+ Returns:
59
+ _type_: _description_
60
+ """
61
+ params: Dict[str, JsonParameter] = {}
62
+ if self._context.parameters_file:
63
+ user_defined = utils.load_yaml(self._context.parameters_file) or {}
64
+
65
+ for key, value in user_defined.items():
66
+ params[key] = JsonParameter(value=value, kind="json")
67
+
68
+ # Update these with some from the environment variables
69
+ params.update(parameters.get_user_set_parameters())
70
+ logger.debug(f"parameters as seen by executor: {params}")
71
+ return params
72
+
73
+ def _set_up_run_log(self, exists_ok=False):
74
+ """
75
+ Create a run log and put that in the run log store
76
+
77
+ If exists_ok, we allow the run log to be already present in the run log store.
78
+ """
79
+ try:
80
+ attempt_run_log = self._context.run_log_store.get_run_log_by_id(
81
+ run_id=self._context.run_id, full=False
82
+ )
83
+
84
+ logger.warning(
85
+ f"The run log by id: {self._context.run_id} already exists, is this designed?"
86
+ )
87
+ raise exceptions.RunLogExistsError(
88
+ f"The run log by id: {self._context.run_id} already exists and is {attempt_run_log.status}"
89
+ )
90
+ except exceptions.RunLogNotFoundError:
91
+ pass
92
+ except exceptions.RunLogExistsError:
93
+ if exists_ok:
94
+ return
95
+ raise
96
+
97
+ # Consolidate and get the parameters
98
+ params = self._get_parameters()
99
+
100
+ self._context.run_log_store.create_run_log(
101
+ run_id=self._context.run_id,
102
+ tag=self._context.tag,
103
+ status=defaults.PROCESSING,
104
+ )
105
+ # Any interaction with run log store attributes should happen via API if available.
106
+ self._context.run_log_store.set_parameters(
107
+ run_id=self._context.run_id, parameters=params
108
+ )
109
+
110
+ # Update run_config
111
+ run_config = self._context.model_dump()
112
+ logger.debug(f"run_config as seen by executor: {run_config}")
113
+ self._context.run_log_store.set_run_config(
114
+ run_id=self._context.run_id, run_config=run_config
115
+ )
116
+
117
+ @property
118
+ def step_attempt_number(self) -> int:
119
+ """
120
+ The attempt number of the current step.
121
+ Orchestrators should use this step to submit multiple attempts of the job.
122
+
123
+ Returns:
124
+ int: The attempt number of the current step. Defaults to 1.
125
+ """
126
+ return int(os.environ.get(defaults.ATTEMPT_NUMBER, 1))
127
+
128
+ def add_code_identities(self, job_log: JobLog, **kwargs):
129
+ """
130
+ Add code identities specific to the implementation.
131
+
132
+ The Base class has an implementation of adding git code identities.
133
+
134
+ Args:
135
+ step_log (object): The step log object
136
+ node (BaseNode): The node we are adding the step log for
137
+ """
138
+ job_log.code_identities.append(utils.get_git_code_identity())
139
+
140
+ def send_return_code(self, stage="traversal"):
141
+ """
142
+ Convenience function used by pipeline to send return code to the caller of the cli
143
+
144
+ Raises:
145
+ Exception: If the pipeline execution failed
146
+ """
147
+ run_id = self._context.run_id
148
+
149
+ run_log = self._context.run_log_store.get_run_log_by_id(
150
+ run_id=run_id, full=False
151
+ )
152
+ if run_log.status == defaults.FAIL:
153
+ raise exceptions.ExecutionFailedError(run_id=run_id)
154
+
155
+ def _sync_catalog(
156
+ self,
157
+ catalog_settings=Optional[List[str]],
158
+ allow_file_not_found_exc: bool = False,
159
+ ) -> List[DataCatalog] | None:
160
+ if not catalog_settings:
161
+ logger.info("No catalog settings found")
162
+ return None
163
+
164
+ data_catalogs = []
165
+ for name_pattern in catalog_settings:
166
+ data_catalog = self._context.catalog.put(
167
+ name=name_pattern,
168
+ allow_file_not_found_exc=allow_file_not_found_exc,
169
+ store_copy=self._context.catalog_store_copy,
170
+ )
171
+
172
+ logger.debug(f"Added data catalog: {data_catalog} to job log")
173
+ data_catalogs.extend(data_catalog)
174
+
175
+ return data_catalogs
176
+
177
+ def add_task_log_to_catalog(
178
+ self, name: str, iter_variable: Optional[IterableParameterModel] = None
179
+ ):
180
+ log_file_name = utils.make_log_file_name(
181
+ name=name,
182
+ iter_variable=iter_variable,
183
+ )
184
+ task_console.save_text(log_file_name)
185
+ # Put the log file in the catalog
186
+ self._context.catalog.put(name=log_file_name)
187
+ os.remove(log_file_name)
188
+
189
+ def execute_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
190
+ """
191
+ Focusses on execution of the job.
192
+ """
193
+ logger.info("Trying to execute job")
194
+
195
+ job_log = self._context.run_log_store.get_job_log(run_id=self._context.run_id)
196
+ self.add_code_identities(job_log)
197
+
198
+ if not self.mock:
199
+ attempt_log = job.execute_command()
200
+ attempt_log.retry_indicator = self._context.retry_indicator
201
+ else:
202
+ attempt_log = StepAttempt(
203
+ status=defaults.SUCCESS,
204
+ retry_indicator=self._context.retry_indicator,
205
+ )
206
+
207
+ job_log.status = attempt_log.status
208
+ job_log.attempts.append(attempt_log)
209
+
210
+ allow_file_not_found_exc = True
211
+ if job_log.status == defaults.SUCCESS:
212
+ allow_file_not_found_exc = False
213
+
214
+ data_catalogs_put: Optional[List[DataCatalog]] = self._sync_catalog(
215
+ catalog_settings=catalog_settings,
216
+ allow_file_not_found_exc=allow_file_not_found_exc,
217
+ )
218
+
219
+ logger.debug(f"data_catalogs_put: {data_catalogs_put}")
220
+ job_log.add_data_catalogs(data_catalogs_put or [])
221
+
222
+ console.print("Summary of job")
223
+ console.print(job_log.get_summary())
224
+
225
+ self._context.run_log_store.add_job_log(
226
+ run_id=self._context.run_id, job_log=job_log
227
+ )
228
+ job_log.add_data_catalogs(data_catalogs_put or [])
229
+
230
+ console.print("Summary of job")
231
+ console.print(job_log.get_summary())
232
+
233
+ self._context.run_log_store.add_job_log(
234
+ run_id=self._context.run_id, job_log=job_log
235
+ )
236
+ self.add_task_log_to_catalog("job")
@@ -0,0 +1,70 @@
1
+ import logging
2
+ import shlex
3
+ import subprocess
4
+ import sys
5
+ from typing import List, Optional
6
+
7
+ from extensions.job_executor import GenericJobExecutor
8
+ from runnable import context, defaults
9
+ from runnable.tasks import BaseTaskType
10
+
11
+ logger = logging.getLogger(defaults.LOGGER_NAME)
12
+
13
+
14
+ class EmulatorJobExecutor(GenericJobExecutor):
15
+ """
16
+ The EmulatorJobExecutor is a job executor that emulates the job execution.
17
+ """
18
+
19
+ service_name: str = "emulator"
20
+
21
+ def submit_job(self, job: BaseTaskType, catalog_settings=Optional[List[str]]):
22
+ """
23
+ This method gets invoked by the CLI.
24
+ """
25
+ self._set_up_run_log()
26
+
27
+ # Call the container job
28
+ job_log = self._context.run_log_store.create_job_log()
29
+ self._context.run_log_store.add_job_log(
30
+ run_id=self._context.run_id, job_log=job_log
31
+ )
32
+ self.run_click_command()
33
+
34
+ def run_click_command(self) -> str:
35
+ """
36
+ Execute a Click-based CLI command in the current virtual environment.
37
+
38
+ Args:
39
+ command: Click command to execute
40
+ """
41
+ assert isinstance(self._context, context.JobContext)
42
+ command = self._context.get_job_callable_command()
43
+
44
+ sub_command = [sys.executable, "-m", "runnable.cli"] + shlex.split(command)[1:]
45
+
46
+ process = subprocess.Popen(
47
+ sub_command,
48
+ stdout=subprocess.PIPE,
49
+ stderr=subprocess.STDOUT,
50
+ universal_newlines=True,
51
+ bufsize=1,
52
+ )
53
+
54
+ output = []
55
+ try:
56
+ while True:
57
+ line = process.stdout.readline() # type: ignore
58
+ if not line and process.poll() is not None:
59
+ break
60
+ print(line, end="")
61
+ output.append(line)
62
+ finally:
63
+ process.stdout.close() # type: ignore
64
+
65
+ if process.returncode != 0:
66
+ raise subprocess.CalledProcessError(
67
+ process.returncode, command, "".join(output)
68
+ )
69
+
70
+ return "".join(output)