runnable 0.17.1__py3-none-any.whl → 0.19.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- extensions/README.md +0 -0
- extensions/__init__.py +0 -0
- extensions/catalog/README.md +0 -0
- extensions/catalog/file_system.py +253 -0
- extensions/catalog/pyproject.toml +14 -0
- extensions/job_executor/README.md +0 -0
- extensions/job_executor/__init__.py +160 -0
- extensions/job_executor/k8s.py +484 -0
- extensions/job_executor/k8s_job_spec.yaml +37 -0
- extensions/job_executor/local.py +61 -0
- extensions/job_executor/local_container.py +192 -0
- extensions/job_executor/pyproject.toml +16 -0
- extensions/nodes/README.md +0 -0
- extensions/nodes/nodes.py +954 -0
- extensions/nodes/pyproject.toml +15 -0
- extensions/pipeline_executor/README.md +0 -0
- extensions/pipeline_executor/__init__.py +644 -0
- extensions/pipeline_executor/argo.py +1307 -0
- extensions/pipeline_executor/argo_specification.yaml +51 -0
- extensions/pipeline_executor/local.py +62 -0
- extensions/pipeline_executor/local_container.py +362 -0
- extensions/pipeline_executor/mocked.py +161 -0
- extensions/pipeline_executor/pyproject.toml +16 -0
- extensions/pipeline_executor/retry.py +180 -0
- extensions/run_log_store/README.md +0 -0
- extensions/run_log_store/__init__.py +0 -0
- extensions/run_log_store/chunked_fs.py +113 -0
- extensions/run_log_store/db/implementation_FF.py +163 -0
- extensions/run_log_store/db/integration_FF.py +0 -0
- extensions/run_log_store/file_system.py +145 -0
- extensions/run_log_store/generic_chunked.py +599 -0
- extensions/run_log_store/pyproject.toml +15 -0
- extensions/secrets/README.md +0 -0
- extensions/secrets/dotenv.py +62 -0
- extensions/secrets/pyproject.toml +15 -0
- runnable/__init__.py +1 -0
- runnable/catalog.py +1 -2
- runnable/entrypoints.py +1 -5
- runnable/executor.py +1 -1
- runnable/parameters.py +0 -9
- runnable/utils.py +5 -25
- {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/METADATA +1 -7
- runnable-0.19.0.dist-info/RECORD +58 -0
- {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/entry_points.txt +1 -0
- runnable-0.17.1.dist-info/RECORD +0 -23
- {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/WHEEL +0 -0
- {runnable-0.17.1.dist-info → runnable-0.19.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,180 @@
|
|
1
|
+
import logging
|
2
|
+
from functools import cached_property
|
3
|
+
from typing import Any, Dict, Optional
|
4
|
+
|
5
|
+
from extensions.pipeline_executor import GenericPipelineExecutor
|
6
|
+
from runnable import context, defaults, exceptions
|
7
|
+
from runnable.datastore import RunLog
|
8
|
+
from runnable.defaults import TypeMapVariable
|
9
|
+
from runnable.nodes import BaseNode
|
10
|
+
|
11
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
12
|
+
|
13
|
+
|
14
|
+
class RetryExecutor(GenericPipelineExecutor):
|
15
|
+
"""
|
16
|
+
The skeleton of an executor class.
|
17
|
+
Any implementation of an executor should inherit this class and over-ride accordingly.
|
18
|
+
|
19
|
+
This is a loaded base class which has a lot of methods already implemented for "typical" executions.
|
20
|
+
Look at the function docs to understand how to use them appropriately.
|
21
|
+
|
22
|
+
For any implementation:
|
23
|
+
1). Who/when should the run log be set up?
|
24
|
+
2). Who/When should the step log be set up?
|
25
|
+
|
26
|
+
"""
|
27
|
+
|
28
|
+
service_name: str = "retry"
|
29
|
+
service_type: str = "executor"
|
30
|
+
run_id: str
|
31
|
+
|
32
|
+
_is_local: bool = True
|
33
|
+
_original_run_log: Optional[RunLog] = None
|
34
|
+
_restart_initiated: bool = False
|
35
|
+
|
36
|
+
@property
|
37
|
+
def _context(self):
|
38
|
+
return context.run_context
|
39
|
+
|
40
|
+
@cached_property
|
41
|
+
def original_run_log(self):
|
42
|
+
return self._context.run_log_store.get_run_log_by_id(
|
43
|
+
run_id=self.run_id,
|
44
|
+
full=True,
|
45
|
+
)
|
46
|
+
|
47
|
+
def _set_up_for_re_run(self, params: Dict[str, Any]) -> None:
|
48
|
+
# Sync the previous run log catalog to this one.
|
49
|
+
self._context.catalog_handler.sync_between_runs(
|
50
|
+
previous_run_id=self.run_id, run_id=self._context.run_id
|
51
|
+
)
|
52
|
+
|
53
|
+
params.update(self.original_run_log.parameters)
|
54
|
+
|
55
|
+
def _set_up_run_log(self, exists_ok=False):
|
56
|
+
"""
|
57
|
+
Create a run log and put that in the run log store
|
58
|
+
|
59
|
+
If exists_ok, we allow the run log to be already present in the run log store.
|
60
|
+
"""
|
61
|
+
super()._set_up_run_log(exists_ok=exists_ok)
|
62
|
+
|
63
|
+
# Should the parameters be copied from previous execution
|
64
|
+
# self._set_up_for_re_run(params=params)
|
65
|
+
|
66
|
+
def execute_from_graph(
|
67
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
68
|
+
):
|
69
|
+
"""
|
70
|
+
This is the entry point to from the graph execution.
|
71
|
+
|
72
|
+
While the self.execute_graph is responsible for traversing the graph, this function is responsible for
|
73
|
+
actual execution of the node.
|
74
|
+
|
75
|
+
If the node type is:
|
76
|
+
* task : We can delegate to _execute_node after checking the eligibility for re-run in cases of a re-run
|
77
|
+
* success: We can delegate to _execute_node
|
78
|
+
* fail: We can delegate to _execute_node
|
79
|
+
|
80
|
+
For nodes that are internally graphs:
|
81
|
+
* parallel: Delegate the responsibility of execution to the node.execute_as_graph()
|
82
|
+
* dag: Delegate the responsibility of execution to the node.execute_as_graph()
|
83
|
+
* map: Delegate the responsibility of execution to the node.execute_as_graph()
|
84
|
+
|
85
|
+
Transpilers will NEVER use this method and will NEVER call ths method.
|
86
|
+
This method should only be used by interactive executors.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
node (Node): The node to execute
|
90
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable.
|
91
|
+
Defaults to None.
|
92
|
+
"""
|
93
|
+
step_log = self._context.run_log_store.create_step_log(
|
94
|
+
node.name, node._get_step_log_name(map_variable)
|
95
|
+
)
|
96
|
+
|
97
|
+
self.add_code_identities(node=node, step_log=step_log)
|
98
|
+
|
99
|
+
step_log.step_type = node.node_type
|
100
|
+
step_log.status = defaults.PROCESSING
|
101
|
+
|
102
|
+
# Add the step log to the database as per the situation.
|
103
|
+
# If its a terminal node, complete it now
|
104
|
+
if node.node_type in ["success", "fail"]:
|
105
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
106
|
+
self._execute_node(node, map_variable=map_variable, **kwargs)
|
107
|
+
return
|
108
|
+
|
109
|
+
# In retry step
|
110
|
+
if not self._is_step_eligible_for_rerun(node, map_variable=map_variable):
|
111
|
+
# If the node name does not match, we move on to the next node.
|
112
|
+
# If previous run was successful, move on to the next step
|
113
|
+
step_log.mock = True
|
114
|
+
step_log.status = defaults.SUCCESS
|
115
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
116
|
+
return
|
117
|
+
|
118
|
+
# We call an internal function to iterate the sub graphs and execute them
|
119
|
+
if node.is_composite:
|
120
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
121
|
+
node.execute_as_graph(map_variable=map_variable, **kwargs)
|
122
|
+
return
|
123
|
+
|
124
|
+
# Executor specific way to trigger a job
|
125
|
+
self._context.run_log_store.add_step_log(step_log, self._context.run_id)
|
126
|
+
self.execute_node(node=node, map_variable=map_variable, **kwargs)
|
127
|
+
|
128
|
+
def _is_step_eligible_for_rerun(
|
129
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None
|
130
|
+
):
|
131
|
+
"""
|
132
|
+
In case of a re-run, this method checks to see if the previous run step status to determine if a re-run is
|
133
|
+
necessary.
|
134
|
+
* True: If its not a re-run.
|
135
|
+
* True: If its a re-run and we failed in the last run or the corresponding logs do not exist.
|
136
|
+
* False: If its a re-run and we succeeded in the last run.
|
137
|
+
|
138
|
+
Most cases, this logic need not be touched
|
139
|
+
|
140
|
+
Args:
|
141
|
+
node (Node): The node to check against re-run
|
142
|
+
map_variable (dict, optional): If the node if of a map state, this corresponds to the value of iterable..
|
143
|
+
Defaults to None.
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
bool: Eligibility for re-run. True means re-run, False means skip to the next step.
|
147
|
+
"""
|
148
|
+
|
149
|
+
node_step_log_name = node._get_step_log_name(map_variable=map_variable)
|
150
|
+
logger.info(
|
151
|
+
f"Scanning previous run logs for node logs of: {node_step_log_name}"
|
152
|
+
)
|
153
|
+
|
154
|
+
if self._restart_initiated:
|
155
|
+
return True
|
156
|
+
|
157
|
+
try:
|
158
|
+
previous_attempt_log, _ = (
|
159
|
+
self.original_run_log.search_step_by_internal_name(node_step_log_name)
|
160
|
+
)
|
161
|
+
except exceptions.StepLogNotFoundError:
|
162
|
+
logger.warning(f"Did not find the node {node.name} in previous run log")
|
163
|
+
self._restart_initiated = True
|
164
|
+
return True # We should re-run the node.
|
165
|
+
|
166
|
+
logger.info(f"The original step status: {previous_attempt_log.status}")
|
167
|
+
|
168
|
+
if previous_attempt_log.status == defaults.SUCCESS:
|
169
|
+
return False # We need not run the node
|
170
|
+
|
171
|
+
logger.info(
|
172
|
+
f"The new execution should start executing graph from this node {node.name}"
|
173
|
+
)
|
174
|
+
self._restart_initiated = True
|
175
|
+
return True
|
176
|
+
|
177
|
+
def execute_node(
|
178
|
+
self, node: BaseNode, map_variable: TypeMapVariable = None, **kwargs
|
179
|
+
):
|
180
|
+
self._execute_node(node, map_variable=map_variable, **kwargs)
|
File without changes
|
File without changes
|
@@ -0,0 +1,113 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from pathlib import Path
|
4
|
+
from string import Template
|
5
|
+
from typing import Any, Dict, Optional, Sequence, Union
|
6
|
+
|
7
|
+
from extensions.run_log_store.generic_chunked import ChunkedRunLogStore
|
8
|
+
from runnable import defaults, utils
|
9
|
+
|
10
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
11
|
+
|
12
|
+
T = Union[str, Path]
|
13
|
+
|
14
|
+
|
15
|
+
class ChunkedFileSystemRunLogStore(ChunkedRunLogStore):
|
16
|
+
"""
|
17
|
+
File system run log store but chunks the run log into thread safe chunks.
|
18
|
+
This enables executions to be parallel.
|
19
|
+
"""
|
20
|
+
|
21
|
+
service_name: str = "chunked-fs"
|
22
|
+
log_folder: str = defaults.LOG_LOCATION_FOLDER
|
23
|
+
|
24
|
+
def get_summary(self) -> Dict[str, Any]:
|
25
|
+
summary = {"Type": self.service_name, "Location": self.log_folder}
|
26
|
+
|
27
|
+
return summary
|
28
|
+
|
29
|
+
def get_matches(
|
30
|
+
self, run_id: str, name: str, multiple_allowed: bool = False
|
31
|
+
) -> Optional[Union[Sequence[T], T]]:
|
32
|
+
"""
|
33
|
+
Get contents of files matching the pattern name*
|
34
|
+
|
35
|
+
Args:
|
36
|
+
run_id (str): The run id
|
37
|
+
name (str): The suffix of the file name to check in the run log store.
|
38
|
+
"""
|
39
|
+
log_folder = self.log_folder_with_run_id(run_id=run_id)
|
40
|
+
sub_name = Template(name).safe_substitute({"creation_time": ""})
|
41
|
+
|
42
|
+
matches = list(log_folder.glob(f"{sub_name}*"))
|
43
|
+
|
44
|
+
if matches:
|
45
|
+
if not multiple_allowed:
|
46
|
+
if len(matches) > 1:
|
47
|
+
msg = f"Multiple matches found for {name} while multiple is not allowed"
|
48
|
+
raise Exception(msg)
|
49
|
+
return matches[0]
|
50
|
+
return matches
|
51
|
+
|
52
|
+
return None
|
53
|
+
|
54
|
+
def log_folder_with_run_id(self, run_id: str) -> Path:
|
55
|
+
"""
|
56
|
+
Utility function to get the log folder for a run id.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
run_id (str): The run id
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
Path: The path to the log folder with the run id
|
63
|
+
"""
|
64
|
+
return Path(self.log_folder) / run_id
|
65
|
+
|
66
|
+
def safe_suffix_json(self, name: Union[Path, str]) -> str:
|
67
|
+
"""
|
68
|
+
Safely attach a suffix to a json file.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
name (Path): The name of the file with or without suffix of json
|
72
|
+
|
73
|
+
Returns:
|
74
|
+
str : The name of the file with .json
|
75
|
+
"""
|
76
|
+
if str(name).endswith("json"):
|
77
|
+
return str(name)
|
78
|
+
|
79
|
+
return str(name) + ".json"
|
80
|
+
|
81
|
+
def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
|
82
|
+
"""
|
83
|
+
Store the contents against the name in the folder.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
run_id (str): The run id
|
87
|
+
contents (dict): The dict to store
|
88
|
+
name (str): The name to store as
|
89
|
+
"""
|
90
|
+
if insert:
|
91
|
+
name = self.log_folder_with_run_id(run_id=run_id) / name
|
92
|
+
|
93
|
+
utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
|
94
|
+
|
95
|
+
with open(self.safe_suffix_json(name), "w") as fw:
|
96
|
+
json.dump(contents, fw, ensure_ascii=True, indent=4)
|
97
|
+
|
98
|
+
def _retrieve(self, name: Union[str, Path]) -> dict:
|
99
|
+
"""
|
100
|
+
Does the job of retrieving from the folder.
|
101
|
+
|
102
|
+
Args:
|
103
|
+
name (str): the name of the file to retrieve
|
104
|
+
|
105
|
+
Returns:
|
106
|
+
dict: The contents
|
107
|
+
"""
|
108
|
+
contents: dict = {}
|
109
|
+
|
110
|
+
with open(self.safe_suffix_json(name), "r") as fr:
|
111
|
+
contents = json.load(fr)
|
112
|
+
|
113
|
+
return contents
|
@@ -0,0 +1,163 @@
|
|
1
|
+
import datetime
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
from pathlib import Path
|
5
|
+
from string import Template
|
6
|
+
from typing import Any, Dict, List, Optional, Union, cast
|
7
|
+
|
8
|
+
from runnable import defaults, utils
|
9
|
+
from runnable.extensions.run_log_store.generic_chunked import ChunkedRunLogStore
|
10
|
+
|
11
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
12
|
+
|
13
|
+
|
14
|
+
class DBRunLogStore(ChunkedRunLogStore):
|
15
|
+
"""
|
16
|
+
File system run log store but chunks the run log into thread safe chunks.
|
17
|
+
This enables executions to be parallel.
|
18
|
+
"""
|
19
|
+
|
20
|
+
service_name: str = "chunked-fs"
|
21
|
+
connection_string: str
|
22
|
+
db_name: str
|
23
|
+
|
24
|
+
_DB_LOG: Any = None
|
25
|
+
_engine: Any = None
|
26
|
+
_session: Any = None
|
27
|
+
_connection_string: str = ""
|
28
|
+
_base: Any = None
|
29
|
+
|
30
|
+
def model_post_init(self, _: Any) -> None:
|
31
|
+
run_context = self._context
|
32
|
+
|
33
|
+
secrets = cast(Dict[str, str], run_context.secrets_handler.get())
|
34
|
+
connection_string = Template(self.connection_string).safe_substitute(**secrets)
|
35
|
+
|
36
|
+
try:
|
37
|
+
import sqlalchemy
|
38
|
+
from sqlalchemy import Column, DateTime, Integer, Sequence, Text
|
39
|
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
40
|
+
|
41
|
+
Base = declarative_base()
|
42
|
+
|
43
|
+
class DBLog(Base):
|
44
|
+
"""
|
45
|
+
Base table for storing run logs in database.
|
46
|
+
|
47
|
+
In this model, we fragment the run log into logical units that are concurrent safe.
|
48
|
+
"""
|
49
|
+
|
50
|
+
__tablename__ = self.db_name
|
51
|
+
pk = Column(Integer, Sequence("id_seq"), primary_key=True)
|
52
|
+
run_id = Column(Text, index=True)
|
53
|
+
attribute_key = Column(
|
54
|
+
Text
|
55
|
+
) # run_log, step_internal_name, parameter_key etc
|
56
|
+
attribute_type = Column(Text) # RunLog, Step, Branch, Parameter
|
57
|
+
attribute_value = Column(Text) # The JSON string
|
58
|
+
created_at = Column(DateTime, default=datetime.datetime.utcnow)
|
59
|
+
|
60
|
+
self._engine = sqlalchemy.create_engine(
|
61
|
+
connection_string, pool_pre_ping=True
|
62
|
+
)
|
63
|
+
self._session = sessionmaker(bind=self._engine)
|
64
|
+
self._DB_LOG = DBLog
|
65
|
+
self._connection_string = connection_string
|
66
|
+
self._base = Base
|
67
|
+
|
68
|
+
except ImportError as _e:
|
69
|
+
logger.exception("Unable to import SQLalchemy, is it installed?")
|
70
|
+
msg = "SQLAlchemy is required for this extension. Please install it"
|
71
|
+
raise Exception(msg) from _e
|
72
|
+
|
73
|
+
def create_tables(self):
|
74
|
+
import sqlalchemy
|
75
|
+
|
76
|
+
engine = sqlalchemy.create_engine(self._connection_string)
|
77
|
+
self._base.metadata.create_all(engine)
|
78
|
+
|
79
|
+
def get_matches(
|
80
|
+
self, run_id: str, name: str, multiple_allowed: bool = False
|
81
|
+
) -> Optional[Union[List[Path], Path]]:
|
82
|
+
"""
|
83
|
+
Get contents of files matching the pattern name*
|
84
|
+
|
85
|
+
Args:
|
86
|
+
run_id (str): The run id
|
87
|
+
name (str): The suffix of the file name to check in the run log store.
|
88
|
+
"""
|
89
|
+
log_folder = self.log_folder_with_run_id(run_id=run_id)
|
90
|
+
|
91
|
+
sub_name = Template(name).safe_substitute({"creation_time": ""})
|
92
|
+
|
93
|
+
matches = list(log_folder.glob(f"{sub_name}*"))
|
94
|
+
if matches:
|
95
|
+
if not multiple_allowed:
|
96
|
+
if len(matches) > 1:
|
97
|
+
msg = f"Multiple matches found for {name} while multiple is not allowed"
|
98
|
+
raise Exception(msg)
|
99
|
+
return matches[0]
|
100
|
+
return matches
|
101
|
+
|
102
|
+
return None
|
103
|
+
|
104
|
+
def log_folder_with_run_id(self, run_id: str) -> Path:
|
105
|
+
"""
|
106
|
+
Utility function to get the log folder for a run id.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
run_id (str): The run id
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
Path: The path to the log folder with the run id
|
113
|
+
"""
|
114
|
+
return Path(self.log_folder) / run_id
|
115
|
+
|
116
|
+
def safe_suffix_json(self, name: Union[Path, str]) -> str:
|
117
|
+
"""
|
118
|
+
Safely attach a suffix to a json file.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
name (Path): The name of the file with or without suffix of json
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
str : The name of the file with .json
|
125
|
+
"""
|
126
|
+
if str(name).endswith("json"):
|
127
|
+
return str(name)
|
128
|
+
|
129
|
+
return str(name) + ".json"
|
130
|
+
|
131
|
+
def _store(self, run_id: str, contents: dict, name: Union[Path, str], insert=False):
|
132
|
+
"""
|
133
|
+
Store the contents against the name in the folder.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
run_id (str): The run id
|
137
|
+
contents (dict): The dict to store
|
138
|
+
name (str): The name to store as
|
139
|
+
"""
|
140
|
+
if insert:
|
141
|
+
name = self.log_folder_with_run_id(run_id=run_id) / name
|
142
|
+
|
143
|
+
utils.safe_make_dir(self.log_folder_with_run_id(run_id=run_id))
|
144
|
+
|
145
|
+
with open(self.safe_suffix_json(name), "w") as fw:
|
146
|
+
json.dump(contents, fw, ensure_ascii=True, indent=4)
|
147
|
+
|
148
|
+
def _retrieve(self, name: Path) -> dict:
|
149
|
+
"""
|
150
|
+
Does the job of retrieving from the folder.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
name (str): the name of the file to retrieve
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
dict: The contents
|
157
|
+
"""
|
158
|
+
contents: dict = {}
|
159
|
+
|
160
|
+
with open(self.safe_suffix_json(name), "r") as fr:
|
161
|
+
contents = json.load(fr)
|
162
|
+
|
163
|
+
return contents
|
File without changes
|
@@ -0,0 +1,145 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import Any, Dict
|
5
|
+
|
6
|
+
from runnable import defaults, exceptions, utils
|
7
|
+
from runnable.datastore import BaseRunLogStore, RunLog
|
8
|
+
|
9
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
10
|
+
|
11
|
+
|
12
|
+
class FileSystemRunLogstore(BaseRunLogStore):
|
13
|
+
"""
|
14
|
+
In this type of Run Log store, we use a file system to store the JSON run log.
|
15
|
+
|
16
|
+
Every single run is stored as a different file which makes it compatible across other store types.
|
17
|
+
|
18
|
+
When to use:
|
19
|
+
When locally testing a pipeline and have the need to compare across runs.
|
20
|
+
Its fully featured and perfectly fine if your local environment is where you would do everything.
|
21
|
+
|
22
|
+
Do not use:
|
23
|
+
If you need parallelization on local, this run log would not support it.
|
24
|
+
|
25
|
+
Example config:
|
26
|
+
|
27
|
+
run_log:
|
28
|
+
type: file-system
|
29
|
+
config:
|
30
|
+
log_folder: The folder to out the logs. Defaults to .run_log_store
|
31
|
+
|
32
|
+
"""
|
33
|
+
|
34
|
+
service_name: str = "file-system"
|
35
|
+
log_folder: str = defaults.LOG_LOCATION_FOLDER
|
36
|
+
|
37
|
+
@property
|
38
|
+
def log_folder_name(self):
|
39
|
+
return self.log_folder
|
40
|
+
|
41
|
+
def get_summary(self) -> Dict[str, Any]:
|
42
|
+
summary = {"Type": self.service_name, "Location": self.log_folder}
|
43
|
+
|
44
|
+
return summary
|
45
|
+
|
46
|
+
def write_to_folder(self, run_log: RunLog):
|
47
|
+
"""
|
48
|
+
Write the run log to the folder
|
49
|
+
|
50
|
+
Args:
|
51
|
+
run_log (RunLog): The run log to be added to the database
|
52
|
+
"""
|
53
|
+
write_to = self.log_folder_name
|
54
|
+
utils.safe_make_dir(write_to)
|
55
|
+
|
56
|
+
write_to_path = Path(write_to)
|
57
|
+
run_id = run_log.run_id
|
58
|
+
json_file_path = write_to_path / f"{run_id}.json"
|
59
|
+
|
60
|
+
with json_file_path.open("w") as fw:
|
61
|
+
json.dump(run_log.model_dump(), fw, ensure_ascii=True, indent=4) # pylint: disable=no-member
|
62
|
+
|
63
|
+
def get_from_folder(self, run_id: str) -> RunLog:
|
64
|
+
"""
|
65
|
+
Look into the run log folder for the run log for the run id.
|
66
|
+
|
67
|
+
If the run log does not exist, raise an exception. If it does, decode it
|
68
|
+
as a RunLog and return it
|
69
|
+
|
70
|
+
Args:
|
71
|
+
run_id (str): The requested run id to retrieve the run log store
|
72
|
+
|
73
|
+
Raises:
|
74
|
+
FileNotFoundError: If the Run Log has not been found.
|
75
|
+
|
76
|
+
Returns:
|
77
|
+
RunLog: The decoded Run log
|
78
|
+
"""
|
79
|
+
write_to = self.log_folder_name
|
80
|
+
|
81
|
+
read_from_path = Path(write_to)
|
82
|
+
json_file_path = read_from_path / f"{run_id}.json"
|
83
|
+
|
84
|
+
if not json_file_path.exists():
|
85
|
+
raise FileNotFoundError(f"Expected {json_file_path} is not present")
|
86
|
+
|
87
|
+
with json_file_path.open("r") as fr:
|
88
|
+
json_str = json.load(fr)
|
89
|
+
run_log = RunLog(**json_str) # pylint: disable=no-member
|
90
|
+
return run_log
|
91
|
+
|
92
|
+
def create_run_log(
|
93
|
+
self,
|
94
|
+
run_id: str,
|
95
|
+
dag_hash: str = "",
|
96
|
+
use_cached: bool = False,
|
97
|
+
tag: str = "",
|
98
|
+
original_run_id: str = "",
|
99
|
+
status: str = defaults.CREATED,
|
100
|
+
) -> RunLog:
|
101
|
+
"""
|
102
|
+
# Creates a Run log
|
103
|
+
# Adds it to the db
|
104
|
+
"""
|
105
|
+
|
106
|
+
try:
|
107
|
+
self.get_run_log_by_id(run_id=run_id, full=False)
|
108
|
+
raise exceptions.RunLogExistsError(run_id=run_id)
|
109
|
+
except exceptions.RunLogNotFoundError:
|
110
|
+
pass
|
111
|
+
|
112
|
+
logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
|
113
|
+
run_log = RunLog(
|
114
|
+
run_id=run_id,
|
115
|
+
dag_hash=dag_hash,
|
116
|
+
tag=tag,
|
117
|
+
status=status,
|
118
|
+
)
|
119
|
+
self.write_to_folder(run_log)
|
120
|
+
return run_log
|
121
|
+
|
122
|
+
def get_run_log_by_id(
|
123
|
+
self,
|
124
|
+
run_id: str,
|
125
|
+
full: bool = False,
|
126
|
+
) -> RunLog:
|
127
|
+
"""
|
128
|
+
# Returns the run_log defined by id
|
129
|
+
# Raises Exception if not found
|
130
|
+
"""
|
131
|
+
try:
|
132
|
+
logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
|
133
|
+
run_log = self.get_from_folder(run_id)
|
134
|
+
return run_log
|
135
|
+
except FileNotFoundError as e:
|
136
|
+
raise exceptions.RunLogNotFoundError(run_id) from e
|
137
|
+
|
138
|
+
def put_run_log(self, run_log: RunLog):
|
139
|
+
"""
|
140
|
+
# Puts the run_log into the database
|
141
|
+
"""
|
142
|
+
logger.info(
|
143
|
+
f"{self.service_name} Putting the run log in the DB: {run_log.run_id}"
|
144
|
+
)
|
145
|
+
self.write_to_folder(run_log)
|