runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- runnable/__init__.py +34 -0
- runnable/catalog.py +141 -0
- runnable/cli.py +272 -0
- runnable/context.py +34 -0
- runnable/datastore.py +687 -0
- runnable/defaults.py +182 -0
- runnable/entrypoints.py +448 -0
- runnable/exceptions.py +94 -0
- runnable/executor.py +421 -0
- runnable/experiment_tracker.py +139 -0
- runnable/extensions/catalog/__init__.py +21 -0
- runnable/extensions/catalog/file_system/__init__.py +0 -0
- runnable/extensions/catalog/file_system/implementation.py +227 -0
- runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
- runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
- runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
- runnable/extensions/executor/__init__.py +725 -0
- runnable/extensions/executor/argo/__init__.py +0 -0
- runnable/extensions/executor/argo/implementation.py +1183 -0
- runnable/extensions/executor/argo/specification.yaml +51 -0
- runnable/extensions/executor/k8s_job/__init__.py +0 -0
- runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
- runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
- runnable/extensions/executor/local/__init__.py +0 -0
- runnable/extensions/executor/local/implementation.py +70 -0
- runnable/extensions/executor/local_container/__init__.py +0 -0
- runnable/extensions/executor/local_container/implementation.py +361 -0
- runnable/extensions/executor/mocked/__init__.py +0 -0
- runnable/extensions/executor/mocked/implementation.py +189 -0
- runnable/extensions/experiment_tracker/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
- runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
- runnable/extensions/nodes.py +655 -0
- runnable/extensions/run_log_store/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
- runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
- runnable/extensions/run_log_store/db/integration_FF.py +0 -0
- runnable/extensions/run_log_store/file_system/__init__.py +0 -0
- runnable/extensions/run_log_store/file_system/implementation.py +136 -0
- runnable/extensions/run_log_store/generic_chunked.py +541 -0
- runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
- runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
- runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
- runnable/extensions/secrets/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/__init__.py +0 -0
- runnable/extensions/secrets/dotenv/implementation.py +100 -0
- runnable/extensions/secrets/env_secrets/__init__.py +0 -0
- runnable/extensions/secrets/env_secrets/implementation.py +42 -0
- runnable/graph.py +464 -0
- runnable/integration.py +205 -0
- runnable/interaction.py +404 -0
- runnable/names.py +546 -0
- runnable/nodes.py +501 -0
- runnable/parameters.py +183 -0
- runnable/pickler.py +102 -0
- runnable/sdk.py +472 -0
- runnable/secrets.py +95 -0
- runnable/tasks.py +395 -0
- runnable/utils.py +630 -0
- runnable-0.3.0.dist-info/METADATA +437 -0
- runnable-0.3.0.dist-info/RECORD +69 -0
- {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
- runnable-0.3.0.dist-info/entry_points.txt +44 -0
- runnable-0.1.0.dist-info/METADATA +0 -16
- runnable-0.1.0.dist-info/RECORD +0 -6
- /runnable/{.gitkeep → extensions/__init__.py} +0 -0
- {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
runnable/datastore.py
ADDED
@@ -0,0 +1,687 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
from abc import ABC, abstractmethod
|
5
|
+
from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union
|
6
|
+
|
7
|
+
from pydantic import BaseModel, Field
|
8
|
+
|
9
|
+
import runnable.context as context
|
10
|
+
from runnable import defaults, exceptions
|
11
|
+
|
12
|
+
logger = logging.getLogger(defaults.LOGGER_NAME)
|
13
|
+
|
14
|
+
# Once defined these classes are sealed to any additions unless a default is provided
|
15
|
+
# Breaking this rule might make runnable backwardly incompatible
|
16
|
+
|
17
|
+
|
18
|
+
class DataCatalog(BaseModel, extra="allow"):
|
19
|
+
"""
|
20
|
+
The captured attributes of a catalog item.
|
21
|
+
"""
|
22
|
+
|
23
|
+
name: str # The name of the dataset
|
24
|
+
data_hash: str = "" # The sha1 hash of the file
|
25
|
+
catalog_relative_path: str = "" # The file path relative the catalog location
|
26
|
+
catalog_handler_location: str = "" # The location of the catalog
|
27
|
+
stage: str = "" # The stage at which we recorded it get, put etc
|
28
|
+
|
29
|
+
# Needed for set operations to work on DataCatalog objects
|
30
|
+
def __hash__(self):
|
31
|
+
"""
|
32
|
+
Needed to Uniqueize DataCatalog objects.
|
33
|
+
"""
|
34
|
+
return hash(self.name)
|
35
|
+
|
36
|
+
def __eq__(self, other):
|
37
|
+
"""
|
38
|
+
Needed for set operations to work on DataCatalog objects
|
39
|
+
"""
|
40
|
+
if not isinstance(other, DataCatalog):
|
41
|
+
return False
|
42
|
+
return other.name == self.name
|
43
|
+
|
44
|
+
|
45
|
+
class StepAttempt(BaseModel):
|
46
|
+
"""
|
47
|
+
The captured attributes of an Attempt of a step.
|
48
|
+
"""
|
49
|
+
|
50
|
+
attempt_number: int = 0
|
51
|
+
start_time: str = ""
|
52
|
+
end_time: str = ""
|
53
|
+
duration: str = "" # end_time - start_time
|
54
|
+
status: str = "FAIL"
|
55
|
+
message: str = ""
|
56
|
+
input_parameters: Dict[str, Any] = Field(default_factory=dict)
|
57
|
+
output_parameters: Dict[str, Any] = Field(default_factory=dict)
|
58
|
+
|
59
|
+
|
60
|
+
class CodeIdentity(BaseModel, extra="allow"):
|
61
|
+
"""
|
62
|
+
The captured attributes of a code identity of a step.
|
63
|
+
"""
|
64
|
+
|
65
|
+
code_identifier: Optional[str] = "" # GIT sha code or docker image id
|
66
|
+
code_identifier_type: Optional[str] = "" # git or docker
|
67
|
+
code_identifier_dependable: Optional[bool] = False # If git, checks if the tree is clean.
|
68
|
+
code_identifier_url: Optional[str] = "" # The git remote url or docker repository url
|
69
|
+
code_identifier_message: Optional[str] = "" # Any optional message
|
70
|
+
|
71
|
+
|
72
|
+
class StepLog(BaseModel):
|
73
|
+
"""
|
74
|
+
The data class capturing the data of a Step
|
75
|
+
"""
|
76
|
+
|
77
|
+
name: str
|
78
|
+
internal_name: str # Should be the dot notation of the step
|
79
|
+
status: str = "FAIL" # Should have a better default
|
80
|
+
step_type: str = "task"
|
81
|
+
message: str = ""
|
82
|
+
mock: bool = False
|
83
|
+
code_identities: List[CodeIdentity] = Field(default_factory=list)
|
84
|
+
attempts: List[StepAttempt] = Field(default_factory=list)
|
85
|
+
user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
|
86
|
+
branches: Dict[str, BranchLog] = Field(default_factory=dict)
|
87
|
+
data_catalog: List[DataCatalog] = Field(default_factory=list)
|
88
|
+
|
89
|
+
def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
|
90
|
+
"""
|
91
|
+
Given a stage, return the data catalogs according to the stage
|
92
|
+
|
93
|
+
Args:
|
94
|
+
stage (str, optional): The stage at which the data was cataloged. Defaults to 'put'.
|
95
|
+
|
96
|
+
Raises:
|
97
|
+
Exception: If the stage was not in get or put.
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
List[DataCatalog]: The list of data catalogs as per the stage.
|
101
|
+
"""
|
102
|
+
if stage not in ["get", "put"]:
|
103
|
+
raise Exception("Stage should be in get or put")
|
104
|
+
|
105
|
+
data_catalogs = []
|
106
|
+
if self.branches:
|
107
|
+
for _, branch in self.branches.items():
|
108
|
+
data_catalogs.extend(branch.get_data_catalogs_by_stage(stage=stage))
|
109
|
+
|
110
|
+
return [dc for dc in self.data_catalog if dc.stage == stage] + data_catalogs
|
111
|
+
|
112
|
+
def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
|
113
|
+
"""
|
114
|
+
Add the data catalogs as asked by the user
|
115
|
+
|
116
|
+
Args:
|
117
|
+
dict_catalogs ([DataCatalog]): A list of data catalog items
|
118
|
+
"""
|
119
|
+
|
120
|
+
if not self.data_catalog:
|
121
|
+
self.data_catalog = []
|
122
|
+
for data_catalog in data_catalogs:
|
123
|
+
self.data_catalog.append(data_catalog)
|
124
|
+
|
125
|
+
|
126
|
+
class BranchLog(BaseModel):
|
127
|
+
"""
|
128
|
+
The dataclass of captured data about a branch of a composite node.
|
129
|
+
|
130
|
+
Returns:
|
131
|
+
[type]: [description]
|
132
|
+
"""
|
133
|
+
|
134
|
+
internal_name: str
|
135
|
+
status: str = "FAIL"
|
136
|
+
steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
|
137
|
+
|
138
|
+
def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
|
139
|
+
"""
|
140
|
+
Given a stage, return the data catalogs according to the stage
|
141
|
+
|
142
|
+
Args:
|
143
|
+
stage (str, optional): The stage at which the data was cataloged. Defaults to 'put'.
|
144
|
+
|
145
|
+
Raises:
|
146
|
+
Exception: If the stage was not in get or put.
|
147
|
+
|
148
|
+
Returns:
|
149
|
+
List[DataCatalog]: The list of data catalogs as per the stage.
|
150
|
+
"""
|
151
|
+
if stage not in ["get", "put"]:
|
152
|
+
raise Exception("Stage should be in get or put")
|
153
|
+
|
154
|
+
data_catalogs = []
|
155
|
+
for _, step in self.steps.items():
|
156
|
+
data_catalogs.extend(step.get_data_catalogs_by_stage(stage=stage))
|
157
|
+
|
158
|
+
return data_catalogs
|
159
|
+
|
160
|
+
|
161
|
+
# Needed for BranchLog of StepLog to be referenced
|
162
|
+
StepLog.model_rebuild()
|
163
|
+
|
164
|
+
|
165
|
+
class RunLog(BaseModel):
|
166
|
+
"""
|
167
|
+
The data captured as part of Run Log
|
168
|
+
"""
|
169
|
+
|
170
|
+
run_id: str
|
171
|
+
dag_hash: Optional[str] = None
|
172
|
+
use_cached: bool = False
|
173
|
+
tag: Optional[str] = ""
|
174
|
+
original_run_id: Optional[str] = ""
|
175
|
+
status: str = defaults.FAIL
|
176
|
+
steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
|
177
|
+
parameters: Dict[str, Any] = Field(default_factory=dict)
|
178
|
+
run_config: Dict[str, Any] = Field(default_factory=dict)
|
179
|
+
|
180
|
+
def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
|
181
|
+
"""
|
182
|
+
Return all the cataloged data by the stage at which they were cataloged.
|
183
|
+
|
184
|
+
Raises:
|
185
|
+
Exception: If stage was not either put or get.
|
186
|
+
|
187
|
+
Args:
|
188
|
+
stage (str, optional): [description]. Defaults to 'put'.
|
189
|
+
"""
|
190
|
+
if stage not in ["get", "put"]:
|
191
|
+
raise Exception("Only get or put are allowed in stage")
|
192
|
+
|
193
|
+
data_catalogs = []
|
194
|
+
for _, step in self.steps.items():
|
195
|
+
data_catalogs.extend(step.get_data_catalogs_by_stage(stage=stage))
|
196
|
+
|
197
|
+
return list(set(data_catalogs))
|
198
|
+
|
199
|
+
def search_branch_by_internal_name(self, i_name: str) -> Tuple[Union[BranchLog, RunLog], Union[StepLog, None]]:
|
200
|
+
"""
|
201
|
+
Given a branch internal name, search for it in the run log.
|
202
|
+
|
203
|
+
If the branch internal name is none, its the run log itself.
|
204
|
+
|
205
|
+
Args:
|
206
|
+
i_name (str): [description]
|
207
|
+
|
208
|
+
Raises:
|
209
|
+
exceptions.BranchLogNotFoundError: [description]
|
210
|
+
|
211
|
+
Returns:
|
212
|
+
Tuple[BranchLog, StepLog]: [description]
|
213
|
+
"""
|
214
|
+
# internal name is null for base dag
|
215
|
+
if not i_name:
|
216
|
+
return self, None
|
217
|
+
|
218
|
+
dot_path = i_name.split(".")
|
219
|
+
|
220
|
+
# any internal name of a branch when split against .
|
221
|
+
# goes step.branch.step.branch
|
222
|
+
# If its odd, its a step, if its even its a branch
|
223
|
+
current_steps = self.steps
|
224
|
+
current_step = None
|
225
|
+
current_branch = None
|
226
|
+
|
227
|
+
for i in range(len(dot_path)):
|
228
|
+
if i % 2:
|
229
|
+
# Its odd, so we are in branch
|
230
|
+
# Get the branch that holds the step
|
231
|
+
current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
|
232
|
+
current_steps = current_branch.steps
|
233
|
+
logger.debug(f"Finding branch {i_name} in branch: {current_branch}")
|
234
|
+
else:
|
235
|
+
# Its even, so we are in step, we start here!
|
236
|
+
# Get the step that holds the branch
|
237
|
+
current_step = current_steps[".".join(dot_path[: i + 1])]
|
238
|
+
logger.debug(f"Finding branch for {i_name} in step: {current_step}")
|
239
|
+
|
240
|
+
logger.debug(f"current branch : {current_branch}, current step {current_step}")
|
241
|
+
if current_branch and current_step:
|
242
|
+
return current_branch, current_step
|
243
|
+
|
244
|
+
raise exceptions.BranchLogNotFoundError(self.run_id, i_name)
|
245
|
+
|
246
|
+
def search_step_by_internal_name(self, i_name: str) -> Tuple[StepLog, Union[BranchLog, None]]:
|
247
|
+
"""
|
248
|
+
Given a steps internal name, search for the step name.
|
249
|
+
|
250
|
+
If the step name when split against '.' is 1, it is the run log
|
251
|
+
|
252
|
+
Args:
|
253
|
+
i_name (str): [description]
|
254
|
+
|
255
|
+
Raises:
|
256
|
+
exceptions.StepLogNotFoundError: [description]
|
257
|
+
|
258
|
+
Returns:
|
259
|
+
Tuple[StepLog, BranchLog]: [description]
|
260
|
+
"""
|
261
|
+
dot_path = i_name.split(".")
|
262
|
+
if len(dot_path) == 1:
|
263
|
+
return self.steps[i_name], None
|
264
|
+
|
265
|
+
current_steps = self.steps
|
266
|
+
current_step = None
|
267
|
+
current_branch = None
|
268
|
+
for i in range(len(dot_path)):
|
269
|
+
if i % 2:
|
270
|
+
# Its odd, so we are in brach name
|
271
|
+
current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
|
272
|
+
current_steps = current_branch.steps
|
273
|
+
logger.debug(f"Finding step log for {i_name} in branch: {current_branch}")
|
274
|
+
else:
|
275
|
+
# Its even, so we are in step, we start here!
|
276
|
+
current_step = current_steps[".".join(dot_path[: i + 1])]
|
277
|
+
logger.debug(f"Finding step log for {i_name} in step: {current_step}")
|
278
|
+
|
279
|
+
logger.debug(f"current branch : {current_branch}, current step {current_step}")
|
280
|
+
if current_branch and current_step:
|
281
|
+
return current_step, current_branch
|
282
|
+
|
283
|
+
raise exceptions.StepLogNotFoundError(self.run_id, i_name)
|
284
|
+
|
285
|
+
|
286
|
+
# All outside modules should interact with dataclasses using the RunLogStore to promote extensibility
|
287
|
+
# If you want to customize dataclass, extend BaseRunLogStore and implement the methods as per the specification
|
288
|
+
|
289
|
+
|
290
|
+
class BaseRunLogStore(ABC, BaseModel):
|
291
|
+
"""
|
292
|
+
The base class of a Run Log Store with many common methods implemented.
|
293
|
+
"""
|
294
|
+
|
295
|
+
service_name: str = ""
|
296
|
+
service_type: str = "run_log_store"
|
297
|
+
|
298
|
+
@property
|
299
|
+
def _context(self):
|
300
|
+
return context.run_context
|
301
|
+
|
302
|
+
@abstractmethod
|
303
|
+
def create_run_log(
|
304
|
+
self,
|
305
|
+
run_id: str,
|
306
|
+
dag_hash: str = "",
|
307
|
+
use_cached: bool = False,
|
308
|
+
tag: str = "",
|
309
|
+
original_run_id: str = "",
|
310
|
+
status: str = defaults.CREATED,
|
311
|
+
**kwargs,
|
312
|
+
):
|
313
|
+
"""
|
314
|
+
Creates a Run Log object by using the config
|
315
|
+
|
316
|
+
Logically the method should do the following:
|
317
|
+
* Creates a Run log
|
318
|
+
* Adds it to the db
|
319
|
+
* Return the log
|
320
|
+
Raises:
|
321
|
+
NotImplementedError: This is a base class and therefore has no default implementation
|
322
|
+
"""
|
323
|
+
|
324
|
+
raise NotImplementedError
|
325
|
+
|
326
|
+
@abstractmethod
|
327
|
+
def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
|
328
|
+
"""
|
329
|
+
Retrieves a Run log from the database using the config and the run_id
|
330
|
+
|
331
|
+
Args:
|
332
|
+
run_id (str): The run_id of the run
|
333
|
+
full (bool): return the full run log store or only the RunLog object
|
334
|
+
|
335
|
+
Returns:
|
336
|
+
RunLog: The RunLog object identified by the run_id
|
337
|
+
|
338
|
+
Logically the method should:
|
339
|
+
* Returns the run_log defined by id from the data store defined by the config
|
340
|
+
|
341
|
+
Raises:
|
342
|
+
NotImplementedError: This is a base class and therefore has no default implementation
|
343
|
+
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
344
|
+
"""
|
345
|
+
|
346
|
+
raise NotImplementedError
|
347
|
+
|
348
|
+
@abstractmethod
|
349
|
+
def put_run_log(self, run_log: RunLog, **kwargs):
|
350
|
+
"""
|
351
|
+
Puts the Run Log in the database as defined by the config
|
352
|
+
|
353
|
+
Args:
|
354
|
+
run_log (RunLog): The Run log of the run
|
355
|
+
|
356
|
+
Logically the method should:
|
357
|
+
Puts the run_log into the database
|
358
|
+
|
359
|
+
Raises:
|
360
|
+
NotImplementedError: This is a base class and therefore has no default implementation
|
361
|
+
"""
|
362
|
+
raise NotImplementedError
|
363
|
+
|
364
|
+
def update_run_log_status(self, run_id: str, status: str):
|
365
|
+
"""
|
366
|
+
Updates the status of the Run Log defined by the run_id
|
367
|
+
|
368
|
+
Args:
|
369
|
+
run_id (str): The run_id of the run
|
370
|
+
status (str): The new status of the run
|
371
|
+
"""
|
372
|
+
logger.info(f"Updating status of run_id {run_id} to {status}")
|
373
|
+
run_log = self.get_run_log_by_id(run_id, full=False)
|
374
|
+
run_log.status = status
|
375
|
+
self.put_run_log(run_log)
|
376
|
+
|
377
|
+
def get_parameters(self, run_id: str, **kwargs) -> dict:
|
378
|
+
"""
|
379
|
+
Get the parameters from the Run log defined by the run_id
|
380
|
+
|
381
|
+
Args:
|
382
|
+
run_id (str): The run_id of the run
|
383
|
+
|
384
|
+
The method should:
|
385
|
+
* Call get_run_log_by_id(run_id) to retrieve the run_log
|
386
|
+
* Return the parameters as identified in the run_log
|
387
|
+
|
388
|
+
Returns:
|
389
|
+
dict: A dictionary of the run_log parameters
|
390
|
+
Raises:
|
391
|
+
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
392
|
+
"""
|
393
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
394
|
+
return run_log.parameters
|
395
|
+
|
396
|
+
def set_parameters(self, run_id: str, parameters: dict, **kwargs):
|
397
|
+
"""
|
398
|
+
Update the parameters of the Run log with the new parameters
|
399
|
+
|
400
|
+
This method would over-write the parameters, if the parameter exists in the run log already
|
401
|
+
|
402
|
+
The method should:
|
403
|
+
* Call get_run_log_by_id(run_id) to retrieve the run_log
|
404
|
+
* Update the parameters of the run_log
|
405
|
+
* Call put_run_log(run_log) to put the run_log in the datastore
|
406
|
+
|
407
|
+
Args:
|
408
|
+
run_id (str): The run_id of the run
|
409
|
+
parameters (dict): The parameters to update in the run log
|
410
|
+
Raises:
|
411
|
+
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
412
|
+
"""
|
413
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
414
|
+
run_log.parameters.update(parameters)
|
415
|
+
self.put_run_log(run_log=run_log)
|
416
|
+
|
417
|
+
def get_run_config(self, run_id: str, **kwargs) -> dict:
|
418
|
+
"""
|
419
|
+
Given a run_id, return the run_config used to perform the run.
|
420
|
+
|
421
|
+
Args:
|
422
|
+
run_id (str): The run_id of the run
|
423
|
+
|
424
|
+
Returns:
|
425
|
+
dict: The run config used for the run
|
426
|
+
"""
|
427
|
+
|
428
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
429
|
+
return run_log.run_config
|
430
|
+
|
431
|
+
def set_run_config(self, run_id: str, run_config: dict, **kwargs):
|
432
|
+
"""Set the run config used to run the run_id
|
433
|
+
|
434
|
+
Args:
|
435
|
+
run_id (str): The run_id of the run
|
436
|
+
run_config (dict): The run_config of the run
|
437
|
+
"""
|
438
|
+
|
439
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
440
|
+
run_log.run_config.update(run_config)
|
441
|
+
self.put_run_log(run_log=run_log)
|
442
|
+
|
443
|
+
def create_step_log(self, name: str, internal_name: str, **kwargs):
|
444
|
+
"""
|
445
|
+
Create a step log by the name and internal name
|
446
|
+
|
447
|
+
The method does not update the Run Log with the step log at this point in time.
|
448
|
+
This method is just an interface for external modules to create a step log
|
449
|
+
|
450
|
+
|
451
|
+
Args:
|
452
|
+
name (str): The friendly name of the step log
|
453
|
+
internal_name (str): The internal naming of the step log. The internal naming is a dot path convention
|
454
|
+
|
455
|
+
Returns:
|
456
|
+
StepLog: A uncommitted step log object
|
457
|
+
"""
|
458
|
+
logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
|
459
|
+
return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
|
460
|
+
|
461
|
+
def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
|
462
|
+
"""
|
463
|
+
Get a step log from the datastore for run_id and the internal naming of the step log
|
464
|
+
|
465
|
+
The internal naming of the step log is a dot path convention.
|
466
|
+
|
467
|
+
The method should:
|
468
|
+
* Call get_run_log_by_id(run_id) to retrieve the run_log
|
469
|
+
* Identify the step location by decoding the internal naming
|
470
|
+
* Return the step log
|
471
|
+
|
472
|
+
Args:
|
473
|
+
internal_name (str): The internal name of the step log
|
474
|
+
run_id (str): The run_id of the run
|
475
|
+
|
476
|
+
Returns:
|
477
|
+
StepLog: The step log object for the step defined by the internal naming and run_id
|
478
|
+
|
479
|
+
Raises:
|
480
|
+
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
481
|
+
StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
|
482
|
+
"""
|
483
|
+
logger.info(f"{self.service_name} Getting the step log: {internal_name} of {run_id}")
|
484
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
485
|
+
step_log, _ = run_log.search_step_by_internal_name(internal_name)
|
486
|
+
return step_log
|
487
|
+
|
488
|
+
def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
|
489
|
+
"""
|
490
|
+
Add the step log in the run log as identified by the run_id in the datastore
|
491
|
+
|
492
|
+
The method should:
|
493
|
+
* Call get_run_log_by_id(run_id) to retrieve the run_log
|
494
|
+
* Identify the branch to add the step by decoding the step_logs internal name
|
495
|
+
* Add the step log to the identified branch log
|
496
|
+
* Call put_run_log(run_log) to put the run_log in the datastore
|
497
|
+
|
498
|
+
Args:
|
499
|
+
step_log (StepLog): The Step log to add to the database
|
500
|
+
run_id (str): The run id of the run
|
501
|
+
|
502
|
+
Raises:
|
503
|
+
RunLogNotFoundError: If the run log for run_id is not found in the datastore
|
504
|
+
BranchLogNotFoundError: If the branch of the step log for internal_name is not found in the datastore
|
505
|
+
for run_id
|
506
|
+
"""
|
507
|
+
logger.info(f"{self.service_name} Adding the step log to DB: {step_log.name}")
|
508
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
509
|
+
|
510
|
+
branch_to_add = ".".join(step_log.internal_name.split(".")[:-1])
|
511
|
+
branch, _ = run_log.search_branch_by_internal_name(branch_to_add)
|
512
|
+
|
513
|
+
if branch is None:
|
514
|
+
branch = run_log
|
515
|
+
branch.steps[step_log.internal_name] = step_log
|
516
|
+
self.put_run_log(run_log=run_log)
|
517
|
+
|
518
|
+
def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
|
519
|
+
"""
|
520
|
+
Creates a uncommitted branch log object by the internal name given
|
521
|
+
|
522
|
+
Args:
|
523
|
+
internal_branch_name (str): Creates a branch log by name internal_branch_name
|
524
|
+
|
525
|
+
Returns:
|
526
|
+
BranchLog: Uncommitted and initialized with defaults BranchLog object
|
527
|
+
"""
|
528
|
+
# Create a new BranchLog
|
529
|
+
logger.info(f"{self.service_name} Creating a Branch Log : {internal_branch_name}")
|
530
|
+
return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
|
531
|
+
|
532
|
+
def get_branch_log(self, internal_branch_name: str, run_id: str, **kwargs) -> Union[BranchLog, RunLog]:
|
533
|
+
"""
|
534
|
+
Returns the branch log by the internal branch name for the run id
|
535
|
+
|
536
|
+
If the internal branch name is none, returns the run log
|
537
|
+
|
538
|
+
Args:
|
539
|
+
internal_branch_name (str): The internal branch name to retrieve.
|
540
|
+
run_id (str): The run id of interest
|
541
|
+
|
542
|
+
Returns:
|
543
|
+
BranchLog: The branch log or the run log as requested.
|
544
|
+
"""
|
545
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
546
|
+
if not internal_branch_name:
|
547
|
+
return run_log
|
548
|
+
branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
|
549
|
+
return branch
|
550
|
+
|
551
|
+
def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs):
|
552
|
+
"""
|
553
|
+
The method should:
|
554
|
+
# Get the run log
|
555
|
+
# Get the branch and step containing the branch
|
556
|
+
# Add the branch to the step
|
557
|
+
# Write the run_log
|
558
|
+
|
559
|
+
The branch log could some times be a Run log and should be handled appropriately
|
560
|
+
|
561
|
+
Args:
|
562
|
+
branch_log (BranchLog): The branch log/run log to add to the database
|
563
|
+
run_id (str): The run id to which the branch/run log is added
|
564
|
+
"""
|
565
|
+
|
566
|
+
internal_branch_name = None
|
567
|
+
|
568
|
+
if isinstance(branch_log, BranchLog):
|
569
|
+
internal_branch_name = branch_log.internal_name
|
570
|
+
|
571
|
+
if not internal_branch_name:
|
572
|
+
self.put_run_log(branch_log) # type: ignore # We are dealing with base dag here
|
573
|
+
return
|
574
|
+
|
575
|
+
run_log = self.get_run_log_by_id(run_id=run_id)
|
576
|
+
|
577
|
+
step_name = ".".join(internal_branch_name.split(".")[:-1])
|
578
|
+
step, _ = run_log.search_step_by_internal_name(step_name)
|
579
|
+
|
580
|
+
step.branches[internal_branch_name] = branch_log # type: ignore
|
581
|
+
self.put_run_log(run_log)
|
582
|
+
|
583
|
+
def create_attempt_log(self, **kwargs) -> StepAttempt:
|
584
|
+
"""
|
585
|
+
Returns an uncommitted step attempt log.
|
586
|
+
|
587
|
+
Returns:
|
588
|
+
StepAttempt: An uncommitted step attempt log
|
589
|
+
"""
|
590
|
+
logger.info(f"{self.service_name} Creating an attempt log")
|
591
|
+
return StepAttempt()
|
592
|
+
|
593
|
+
def create_code_identity(self, **kwargs) -> CodeIdentity:
|
594
|
+
"""
|
595
|
+
Creates an uncommitted Code identity class
|
596
|
+
|
597
|
+
Returns:
|
598
|
+
CodeIdentity: An uncommitted code identity class
|
599
|
+
"""
|
600
|
+
logger.info(f"{self.service_name} Creating Code identity")
|
601
|
+
return CodeIdentity()
|
602
|
+
|
603
|
+
def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
|
604
|
+
"""
|
605
|
+
Create a uncommitted data catalog object
|
606
|
+
|
607
|
+
Args:
|
608
|
+
name (str): The name of the data catalog item to put
|
609
|
+
|
610
|
+
Returns:
|
611
|
+
DataCatalog: The DataCatalog object.
|
612
|
+
"""
|
613
|
+
logger.info(f"{self.service_name} Creating Data Catalog for {name}")
|
614
|
+
return DataCatalog(name=name)
|
615
|
+
|
616
|
+
|
617
|
+
class BufferRunLogstore(BaseRunLogStore):
|
618
|
+
"""
|
619
|
+
A in-memory run log store.
|
620
|
+
|
621
|
+
This Run Log store will not persist any results.
|
622
|
+
|
623
|
+
When to use:
|
624
|
+
When testing some part of the pipeline.
|
625
|
+
|
626
|
+
Do not use:
|
627
|
+
When you need to compare between runs or in production set up
|
628
|
+
|
629
|
+
This Run Log Store is concurrent write safe as it is in memory
|
630
|
+
|
631
|
+
Example config:
|
632
|
+
run_log:
|
633
|
+
type: buffered
|
634
|
+
|
635
|
+
"""
|
636
|
+
|
637
|
+
service_name: str = "buffered"
|
638
|
+
run_log: Optional[RunLog] = Field(default=None, exclude=True) # For a buffered Run Log, this is the database
|
639
|
+
|
640
|
+
def create_run_log(
|
641
|
+
self,
|
642
|
+
run_id: str,
|
643
|
+
dag_hash: str = "",
|
644
|
+
use_cached: bool = False,
|
645
|
+
tag: str = "",
|
646
|
+
original_run_id: str = "",
|
647
|
+
status: str = defaults.CREATED,
|
648
|
+
**kwargs,
|
649
|
+
) -> RunLog:
|
650
|
+
"""
|
651
|
+
# Creates a Run log
|
652
|
+
# Adds it to the db
|
653
|
+
# Return the log
|
654
|
+
|
655
|
+
Refer to BaseRunLogStore.create_run_log
|
656
|
+
"""
|
657
|
+
|
658
|
+
logger.info(f"{self.service_name} Creating a Run Log and adding it to DB")
|
659
|
+
self.run_log = RunLog(
|
660
|
+
run_id=run_id,
|
661
|
+
dag_hash=dag_hash,
|
662
|
+
use_cached=use_cached,
|
663
|
+
tag=tag,
|
664
|
+
original_run_id=original_run_id,
|
665
|
+
status=status,
|
666
|
+
)
|
667
|
+
return self.run_log
|
668
|
+
|
669
|
+
def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
|
670
|
+
"""
|
671
|
+
# Returns the run_log defined by id
|
672
|
+
# Raises Exception if not found
|
673
|
+
"""
|
674
|
+
|
675
|
+
logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
|
676
|
+
if self.run_log:
|
677
|
+
return self.run_log
|
678
|
+
|
679
|
+
raise exceptions.RunLogNotFoundError(run_id)
|
680
|
+
|
681
|
+
def put_run_log(self, run_log: RunLog, **kwargs):
|
682
|
+
"""
|
683
|
+
# Puts the run log in the db
|
684
|
+
# Raises Exception if not found
|
685
|
+
"""
|
686
|
+
logger.info(f"{self.service_name} Putting the run log in the DB: {run_log.run_id}")
|
687
|
+
self.run_log = run_log
|