runnable 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +687 -0
  6. runnable/defaults.py +182 -0
  7. runnable/entrypoints.py +448 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +421 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +227 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +725 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1183 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +70 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +361 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +189 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +655 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +404 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +501 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +472 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +395 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.3.0.dist-info/METADATA +437 -0
  65. runnable-0.3.0.dist-info/RECORD +69 -0
  66. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/WHEEL +1 -1
  67. runnable-0.3.0.dist-info/entry_points.txt +44 -0
  68. runnable-0.1.0.dist-info/METADATA +0 -16
  69. runnable-0.1.0.dist-info/RECORD +0 -6
  70. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.3.0.dist-info}/LICENSE +0 -0
runnable/datastore.py ADDED
@@ -0,0 +1,687 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+ import runnable.context as context
10
+ from runnable import defaults, exceptions
11
+
12
+ logger = logging.getLogger(defaults.LOGGER_NAME)
13
+
14
+ # Once defined these classes are sealed to any additions unless a default is provided
15
+ # Breaking this rule might make runnable backwardly incompatible
16
+
17
+
18
+ class DataCatalog(BaseModel, extra="allow"):
19
+ """
20
+ The captured attributes of a catalog item.
21
+ """
22
+
23
+ name: str #  The name of the dataset
24
+ data_hash: str = "" # The sha1 hash of the file
25
+ catalog_relative_path: str = "" # The file path relative the catalog location
26
+ catalog_handler_location: str = "" # The location of the catalog
27
+ stage: str = "" # The stage at which we recorded it get, put etc
28
+
29
+ # Needed for set operations to work on DataCatalog objects
30
+ def __hash__(self):
31
+ """
32
+ Needed to Uniqueize DataCatalog objects.
33
+ """
34
+ return hash(self.name)
35
+
36
+ def __eq__(self, other):
37
+ """
38
+ Needed for set operations to work on DataCatalog objects
39
+ """
40
+ if not isinstance(other, DataCatalog):
41
+ return False
42
+ return other.name == self.name
43
+
44
+
45
+ class StepAttempt(BaseModel):
46
+ """
47
+ The captured attributes of an Attempt of a step.
48
+ """
49
+
50
+ attempt_number: int = 0
51
+ start_time: str = ""
52
+ end_time: str = ""
53
+ duration: str = "" #  end_time - start_time
54
+ status: str = "FAIL"
55
+ message: str = ""
56
+ input_parameters: Dict[str, Any] = Field(default_factory=dict)
57
+ output_parameters: Dict[str, Any] = Field(default_factory=dict)
58
+
59
+
60
+ class CodeIdentity(BaseModel, extra="allow"):
61
+ """
62
+ The captured attributes of a code identity of a step.
63
+ """
64
+
65
+ code_identifier: Optional[str] = "" # GIT sha code or docker image id
66
+ code_identifier_type: Optional[str] = "" # git or docker
67
+ code_identifier_dependable: Optional[bool] = False # If git, checks if the tree is clean.
68
+ code_identifier_url: Optional[str] = "" # The git remote url or docker repository url
69
+ code_identifier_message: Optional[str] = "" # Any optional message
70
+
71
+
72
+ class StepLog(BaseModel):
73
+ """
74
+ The data class capturing the data of a Step
75
+ """
76
+
77
+ name: str
78
+ internal_name: str # Should be the dot notation of the step
79
+ status: str = "FAIL" #  Should have a better default
80
+ step_type: str = "task"
81
+ message: str = ""
82
+ mock: bool = False
83
+ code_identities: List[CodeIdentity] = Field(default_factory=list)
84
+ attempts: List[StepAttempt] = Field(default_factory=list)
85
+ user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
86
+ branches: Dict[str, BranchLog] = Field(default_factory=dict)
87
+ data_catalog: List[DataCatalog] = Field(default_factory=list)
88
+
89
+ def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
90
+ """
91
+ Given a stage, return the data catalogs according to the stage
92
+
93
+ Args:
94
+ stage (str, optional): The stage at which the data was cataloged. Defaults to 'put'.
95
+
96
+ Raises:
97
+ Exception: If the stage was not in get or put.
98
+
99
+ Returns:
100
+ List[DataCatalog]: The list of data catalogs as per the stage.
101
+ """
102
+ if stage not in ["get", "put"]:
103
+ raise Exception("Stage should be in get or put")
104
+
105
+ data_catalogs = []
106
+ if self.branches:
107
+ for _, branch in self.branches.items():
108
+ data_catalogs.extend(branch.get_data_catalogs_by_stage(stage=stage))
109
+
110
+ return [dc for dc in self.data_catalog if dc.stage == stage] + data_catalogs
111
+
112
+ def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
113
+ """
114
+ Add the data catalogs as asked by the user
115
+
116
+ Args:
117
+ dict_catalogs ([DataCatalog]): A list of data catalog items
118
+ """
119
+
120
+ if not self.data_catalog:
121
+ self.data_catalog = []
122
+ for data_catalog in data_catalogs:
123
+ self.data_catalog.append(data_catalog)
124
+
125
+
126
+ class BranchLog(BaseModel):
127
+ """
128
+ The dataclass of captured data about a branch of a composite node.
129
+
130
+ Returns:
131
+ [type]: [description]
132
+ """
133
+
134
+ internal_name: str
135
+ status: str = "FAIL"
136
+ steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
137
+
138
+ def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
139
+ """
140
+ Given a stage, return the data catalogs according to the stage
141
+
142
+ Args:
143
+ stage (str, optional): The stage at which the data was cataloged. Defaults to 'put'.
144
+
145
+ Raises:
146
+ Exception: If the stage was not in get or put.
147
+
148
+ Returns:
149
+ List[DataCatalog]: The list of data catalogs as per the stage.
150
+ """
151
+ if stage not in ["get", "put"]:
152
+ raise Exception("Stage should be in get or put")
153
+
154
+ data_catalogs = []
155
+ for _, step in self.steps.items():
156
+ data_catalogs.extend(step.get_data_catalogs_by_stage(stage=stage))
157
+
158
+ return data_catalogs
159
+
160
+
161
+ # Needed for BranchLog of StepLog to be referenced
162
+ StepLog.model_rebuild()
163
+
164
+
165
+ class RunLog(BaseModel):
166
+ """
167
+ The data captured as part of Run Log
168
+ """
169
+
170
+ run_id: str
171
+ dag_hash: Optional[str] = None
172
+ use_cached: bool = False
173
+ tag: Optional[str] = ""
174
+ original_run_id: Optional[str] = ""
175
+ status: str = defaults.FAIL
176
+ steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
177
+ parameters: Dict[str, Any] = Field(default_factory=dict)
178
+ run_config: Dict[str, Any] = Field(default_factory=dict)
179
+
180
+ def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
181
+ """
182
+ Return all the cataloged data by the stage at which they were cataloged.
183
+
184
+ Raises:
185
+ Exception: If stage was not either put or get.
186
+
187
+ Args:
188
+ stage (str, optional): [description]. Defaults to 'put'.
189
+ """
190
+ if stage not in ["get", "put"]:
191
+ raise Exception("Only get or put are allowed in stage")
192
+
193
+ data_catalogs = []
194
+ for _, step in self.steps.items():
195
+ data_catalogs.extend(step.get_data_catalogs_by_stage(stage=stage))
196
+
197
+ return list(set(data_catalogs))
198
+
199
+ def search_branch_by_internal_name(self, i_name: str) -> Tuple[Union[BranchLog, RunLog], Union[StepLog, None]]:
200
+ """
201
+ Given a branch internal name, search for it in the run log.
202
+
203
+ If the branch internal name is none, its the run log itself.
204
+
205
+ Args:
206
+ i_name (str): [description]
207
+
208
+ Raises:
209
+ exceptions.BranchLogNotFoundError: [description]
210
+
211
+ Returns:
212
+ Tuple[BranchLog, StepLog]: [description]
213
+ """
214
+ # internal name is null for base dag
215
+ if not i_name:
216
+ return self, None
217
+
218
+ dot_path = i_name.split(".")
219
+
220
+ # any internal name of a branch when split against .
221
+ # goes step.branch.step.branch
222
+ # If its odd, its a step, if its even its a branch
223
+ current_steps = self.steps
224
+ current_step = None
225
+ current_branch = None
226
+
227
+ for i in range(len(dot_path)):
228
+ if i % 2:
229
+ # Its odd, so we are in branch
230
+ # Get the branch that holds the step
231
+ current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
232
+ current_steps = current_branch.steps
233
+ logger.debug(f"Finding branch {i_name} in branch: {current_branch}")
234
+ else:
235
+ # Its even, so we are in step, we start here!
236
+ # Get the step that holds the branch
237
+ current_step = current_steps[".".join(dot_path[: i + 1])]
238
+ logger.debug(f"Finding branch for {i_name} in step: {current_step}")
239
+
240
+ logger.debug(f"current branch : {current_branch}, current step {current_step}")
241
+ if current_branch and current_step:
242
+ return current_branch, current_step
243
+
244
+ raise exceptions.BranchLogNotFoundError(self.run_id, i_name)
245
+
246
+ def search_step_by_internal_name(self, i_name: str) -> Tuple[StepLog, Union[BranchLog, None]]:
247
+ """
248
+ Given a steps internal name, search for the step name.
249
+
250
+ If the step name when split against '.' is 1, it is the run log
251
+
252
+ Args:
253
+ i_name (str): [description]
254
+
255
+ Raises:
256
+ exceptions.StepLogNotFoundError: [description]
257
+
258
+ Returns:
259
+ Tuple[StepLog, BranchLog]: [description]
260
+ """
261
+ dot_path = i_name.split(".")
262
+ if len(dot_path) == 1:
263
+ return self.steps[i_name], None
264
+
265
+ current_steps = self.steps
266
+ current_step = None
267
+ current_branch = None
268
+ for i in range(len(dot_path)):
269
+ if i % 2:
270
+ # Its odd, so we are in brach name
271
+ current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
272
+ current_steps = current_branch.steps
273
+ logger.debug(f"Finding step log for {i_name} in branch: {current_branch}")
274
+ else:
275
+ # Its even, so we are in step, we start here!
276
+ current_step = current_steps[".".join(dot_path[: i + 1])]
277
+ logger.debug(f"Finding step log for {i_name} in step: {current_step}")
278
+
279
+ logger.debug(f"current branch : {current_branch}, current step {current_step}")
280
+ if current_branch and current_step:
281
+ return current_step, current_branch
282
+
283
+ raise exceptions.StepLogNotFoundError(self.run_id, i_name)
284
+
285
+
286
+ # All outside modules should interact with dataclasses using the RunLogStore to promote extensibility
287
+ # If you want to customize dataclass, extend BaseRunLogStore and implement the methods as per the specification
288
+
289
+
290
+ class BaseRunLogStore(ABC, BaseModel):
291
+ """
292
+ The base class of a Run Log Store with many common methods implemented.
293
+ """
294
+
295
+ service_name: str = ""
296
+ service_type: str = "run_log_store"
297
+
298
+ @property
299
+ def _context(self):
300
+ return context.run_context
301
+
302
+ @abstractmethod
303
+ def create_run_log(
304
+ self,
305
+ run_id: str,
306
+ dag_hash: str = "",
307
+ use_cached: bool = False,
308
+ tag: str = "",
309
+ original_run_id: str = "",
310
+ status: str = defaults.CREATED,
311
+ **kwargs,
312
+ ):
313
+ """
314
+ Creates a Run Log object by using the config
315
+
316
+ Logically the method should do the following:
317
+ * Creates a Run log
318
+ * Adds it to the db
319
+ * Return the log
320
+ Raises:
321
+ NotImplementedError: This is a base class and therefore has no default implementation
322
+ """
323
+
324
+ raise NotImplementedError
325
+
326
+ @abstractmethod
327
+ def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
328
+ """
329
+ Retrieves a Run log from the database using the config and the run_id
330
+
331
+ Args:
332
+ run_id (str): The run_id of the run
333
+ full (bool): return the full run log store or only the RunLog object
334
+
335
+ Returns:
336
+ RunLog: The RunLog object identified by the run_id
337
+
338
+ Logically the method should:
339
+ * Returns the run_log defined by id from the data store defined by the config
340
+
341
+ Raises:
342
+ NotImplementedError: This is a base class and therefore has no default implementation
343
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
344
+ """
345
+
346
+ raise NotImplementedError
347
+
348
+ @abstractmethod
349
+ def put_run_log(self, run_log: RunLog, **kwargs):
350
+ """
351
+ Puts the Run Log in the database as defined by the config
352
+
353
+ Args:
354
+ run_log (RunLog): The Run log of the run
355
+
356
+ Logically the method should:
357
+ Puts the run_log into the database
358
+
359
+ Raises:
360
+ NotImplementedError: This is a base class and therefore has no default implementation
361
+ """
362
+ raise NotImplementedError
363
+
364
+ def update_run_log_status(self, run_id: str, status: str):
365
+ """
366
+ Updates the status of the Run Log defined by the run_id
367
+
368
+ Args:
369
+ run_id (str): The run_id of the run
370
+ status (str): The new status of the run
371
+ """
372
+ logger.info(f"Updating status of run_id {run_id} to {status}")
373
+ run_log = self.get_run_log_by_id(run_id, full=False)
374
+ run_log.status = status
375
+ self.put_run_log(run_log)
376
+
377
+ def get_parameters(self, run_id: str, **kwargs) -> dict:
378
+ """
379
+ Get the parameters from the Run log defined by the run_id
380
+
381
+ Args:
382
+ run_id (str): The run_id of the run
383
+
384
+ The method should:
385
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
386
+ * Return the parameters as identified in the run_log
387
+
388
+ Returns:
389
+ dict: A dictionary of the run_log parameters
390
+ Raises:
391
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
392
+ """
393
+ run_log = self.get_run_log_by_id(run_id=run_id)
394
+ return run_log.parameters
395
+
396
+ def set_parameters(self, run_id: str, parameters: dict, **kwargs):
397
+ """
398
+ Update the parameters of the Run log with the new parameters
399
+
400
+ This method would over-write the parameters, if the parameter exists in the run log already
401
+
402
+ The method should:
403
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
404
+ * Update the parameters of the run_log
405
+ * Call put_run_log(run_log) to put the run_log in the datastore
406
+
407
+ Args:
408
+ run_id (str): The run_id of the run
409
+ parameters (dict): The parameters to update in the run log
410
+ Raises:
411
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
412
+ """
413
+ run_log = self.get_run_log_by_id(run_id=run_id)
414
+ run_log.parameters.update(parameters)
415
+ self.put_run_log(run_log=run_log)
416
+
417
+ def get_run_config(self, run_id: str, **kwargs) -> dict:
418
+ """
419
+ Given a run_id, return the run_config used to perform the run.
420
+
421
+ Args:
422
+ run_id (str): The run_id of the run
423
+
424
+ Returns:
425
+ dict: The run config used for the run
426
+ """
427
+
428
+ run_log = self.get_run_log_by_id(run_id=run_id)
429
+ return run_log.run_config
430
+
431
+ def set_run_config(self, run_id: str, run_config: dict, **kwargs):
432
+ """Set the run config used to run the run_id
433
+
434
+ Args:
435
+ run_id (str): The run_id of the run
436
+ run_config (dict): The run_config of the run
437
+ """
438
+
439
+ run_log = self.get_run_log_by_id(run_id=run_id)
440
+ run_log.run_config.update(run_config)
441
+ self.put_run_log(run_log=run_log)
442
+
443
+ def create_step_log(self, name: str, internal_name: str, **kwargs):
444
+ """
445
+ Create a step log by the name and internal name
446
+
447
+ The method does not update the Run Log with the step log at this point in time.
448
+ This method is just an interface for external modules to create a step log
449
+
450
+
451
+ Args:
452
+ name (str): The friendly name of the step log
453
+ internal_name (str): The internal naming of the step log. The internal naming is a dot path convention
454
+
455
+ Returns:
456
+ StepLog: A uncommitted step log object
457
+ """
458
+ logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
459
+ return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
460
+
461
+ def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
462
+ """
463
+ Get a step log from the datastore for run_id and the internal naming of the step log
464
+
465
+ The internal naming of the step log is a dot path convention.
466
+
467
+ The method should:
468
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
469
+ * Identify the step location by decoding the internal naming
470
+ * Return the step log
471
+
472
+ Args:
473
+ internal_name (str): The internal name of the step log
474
+ run_id (str): The run_id of the run
475
+
476
+ Returns:
477
+ StepLog: The step log object for the step defined by the internal naming and run_id
478
+
479
+ Raises:
480
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
481
+ StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
482
+ """
483
+ logger.info(f"{self.service_name} Getting the step log: {internal_name} of {run_id}")
484
+ run_log = self.get_run_log_by_id(run_id=run_id)
485
+ step_log, _ = run_log.search_step_by_internal_name(internal_name)
486
+ return step_log
487
+
488
+ def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
489
+ """
490
+ Add the step log in the run log as identified by the run_id in the datastore
491
+
492
+ The method should:
493
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
494
+ * Identify the branch to add the step by decoding the step_logs internal name
495
+ * Add the step log to the identified branch log
496
+ * Call put_run_log(run_log) to put the run_log in the datastore
497
+
498
+ Args:
499
+ step_log (StepLog): The Step log to add to the database
500
+ run_id (str): The run id of the run
501
+
502
+ Raises:
503
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
504
+ BranchLogNotFoundError: If the branch of the step log for internal_name is not found in the datastore
505
+ for run_id
506
+ """
507
+ logger.info(f"{self.service_name} Adding the step log to DB: {step_log.name}")
508
+ run_log = self.get_run_log_by_id(run_id=run_id)
509
+
510
+ branch_to_add = ".".join(step_log.internal_name.split(".")[:-1])
511
+ branch, _ = run_log.search_branch_by_internal_name(branch_to_add)
512
+
513
+ if branch is None:
514
+ branch = run_log
515
+ branch.steps[step_log.internal_name] = step_log
516
+ self.put_run_log(run_log=run_log)
517
+
518
+ def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
519
+ """
520
+ Creates a uncommitted branch log object by the internal name given
521
+
522
+ Args:
523
+ internal_branch_name (str): Creates a branch log by name internal_branch_name
524
+
525
+ Returns:
526
+ BranchLog: Uncommitted and initialized with defaults BranchLog object
527
+ """
528
+ # Create a new BranchLog
529
+ logger.info(f"{self.service_name} Creating a Branch Log : {internal_branch_name}")
530
+ return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
531
+
532
+ def get_branch_log(self, internal_branch_name: str, run_id: str, **kwargs) -> Union[BranchLog, RunLog]:
533
+ """
534
+ Returns the branch log by the internal branch name for the run id
535
+
536
+ If the internal branch name is none, returns the run log
537
+
538
+ Args:
539
+ internal_branch_name (str): The internal branch name to retrieve.
540
+ run_id (str): The run id of interest
541
+
542
+ Returns:
543
+ BranchLog: The branch log or the run log as requested.
544
+ """
545
+ run_log = self.get_run_log_by_id(run_id=run_id)
546
+ if not internal_branch_name:
547
+ return run_log
548
+ branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
549
+ return branch
550
+
551
+ def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs):
552
+ """
553
+ The method should:
554
+ # Get the run log
555
+ # Get the branch and step containing the branch
556
+ # Add the branch to the step
557
+ # Write the run_log
558
+
559
+ The branch log could some times be a Run log and should be handled appropriately
560
+
561
+ Args:
562
+ branch_log (BranchLog): The branch log/run log to add to the database
563
+ run_id (str): The run id to which the branch/run log is added
564
+ """
565
+
566
+ internal_branch_name = None
567
+
568
+ if isinstance(branch_log, BranchLog):
569
+ internal_branch_name = branch_log.internal_name
570
+
571
+ if not internal_branch_name:
572
+ self.put_run_log(branch_log) # type: ignore # We are dealing with base dag here
573
+ return
574
+
575
+ run_log = self.get_run_log_by_id(run_id=run_id)
576
+
577
+ step_name = ".".join(internal_branch_name.split(".")[:-1])
578
+ step, _ = run_log.search_step_by_internal_name(step_name)
579
+
580
+ step.branches[internal_branch_name] = branch_log # type: ignore
581
+ self.put_run_log(run_log)
582
+
583
+ def create_attempt_log(self, **kwargs) -> StepAttempt:
584
+ """
585
+ Returns an uncommitted step attempt log.
586
+
587
+ Returns:
588
+ StepAttempt: An uncommitted step attempt log
589
+ """
590
+ logger.info(f"{self.service_name} Creating an attempt log")
591
+ return StepAttempt()
592
+
593
+ def create_code_identity(self, **kwargs) -> CodeIdentity:
594
+ """
595
+ Creates an uncommitted Code identity class
596
+
597
+ Returns:
598
+ CodeIdentity: An uncommitted code identity class
599
+ """
600
+ logger.info(f"{self.service_name} Creating Code identity")
601
+ return CodeIdentity()
602
+
603
+ def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
604
+ """
605
+ Create a uncommitted data catalog object
606
+
607
+ Args:
608
+ name (str): The name of the data catalog item to put
609
+
610
+ Returns:
611
+ DataCatalog: The DataCatalog object.
612
+ """
613
+ logger.info(f"{self.service_name} Creating Data Catalog for {name}")
614
+ return DataCatalog(name=name)
615
+
616
+
617
+ class BufferRunLogstore(BaseRunLogStore):
618
+ """
619
+ A in-memory run log store.
620
+
621
+ This Run Log store will not persist any results.
622
+
623
+ When to use:
624
+ When testing some part of the pipeline.
625
+
626
+ Do not use:
627
+ When you need to compare between runs or in production set up
628
+
629
+ This Run Log Store is concurrent write safe as it is in memory
630
+
631
+ Example config:
632
+ run_log:
633
+ type: buffered
634
+
635
+ """
636
+
637
+ service_name: str = "buffered"
638
+ run_log: Optional[RunLog] = Field(default=None, exclude=True) # For a buffered Run Log, this is the database
639
+
640
+ def create_run_log(
641
+ self,
642
+ run_id: str,
643
+ dag_hash: str = "",
644
+ use_cached: bool = False,
645
+ tag: str = "",
646
+ original_run_id: str = "",
647
+ status: str = defaults.CREATED,
648
+ **kwargs,
649
+ ) -> RunLog:
650
+ """
651
+ # Creates a Run log
652
+ # Adds it to the db
653
+ # Return the log
654
+
655
+ Refer to BaseRunLogStore.create_run_log
656
+ """
657
+
658
+ logger.info(f"{self.service_name} Creating a Run Log and adding it to DB")
659
+ self.run_log = RunLog(
660
+ run_id=run_id,
661
+ dag_hash=dag_hash,
662
+ use_cached=use_cached,
663
+ tag=tag,
664
+ original_run_id=original_run_id,
665
+ status=status,
666
+ )
667
+ return self.run_log
668
+
669
+ def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
670
+ """
671
+ # Returns the run_log defined by id
672
+ # Raises Exception if not found
673
+ """
674
+
675
+ logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
676
+ if self.run_log:
677
+ return self.run_log
678
+
679
+ raise exceptions.RunLogNotFoundError(run_id)
680
+
681
+ def put_run_log(self, run_log: RunLog, **kwargs):
682
+ """
683
+ # Puts the run log in the db
684
+ # Raises Exception if not found
685
+ """
686
+ logger.info(f"{self.service_name} Putting the run log in the DB: {run_log.run_id}")
687
+ self.run_log = run_log