runnable 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. runnable/__init__.py +34 -0
  2. runnable/catalog.py +141 -0
  3. runnable/cli.py +272 -0
  4. runnable/context.py +34 -0
  5. runnable/datastore.py +686 -0
  6. runnable/defaults.py +179 -0
  7. runnable/entrypoints.py +484 -0
  8. runnable/exceptions.py +94 -0
  9. runnable/executor.py +431 -0
  10. runnable/experiment_tracker.py +139 -0
  11. runnable/extensions/catalog/__init__.py +21 -0
  12. runnable/extensions/catalog/file_system/__init__.py +0 -0
  13. runnable/extensions/catalog/file_system/implementation.py +226 -0
  14. runnable/extensions/catalog/k8s_pvc/__init__.py +0 -0
  15. runnable/extensions/catalog/k8s_pvc/implementation.py +16 -0
  16. runnable/extensions/catalog/k8s_pvc/integration.py +59 -0
  17. runnable/extensions/executor/__init__.py +714 -0
  18. runnable/extensions/executor/argo/__init__.py +0 -0
  19. runnable/extensions/executor/argo/implementation.py +1182 -0
  20. runnable/extensions/executor/argo/specification.yaml +51 -0
  21. runnable/extensions/executor/k8s_job/__init__.py +0 -0
  22. runnable/extensions/executor/k8s_job/implementation_FF.py +259 -0
  23. runnable/extensions/executor/k8s_job/integration_FF.py +69 -0
  24. runnable/extensions/executor/local/__init__.py +0 -0
  25. runnable/extensions/executor/local/implementation.py +69 -0
  26. runnable/extensions/executor/local_container/__init__.py +0 -0
  27. runnable/extensions/executor/local_container/implementation.py +367 -0
  28. runnable/extensions/executor/mocked/__init__.py +0 -0
  29. runnable/extensions/executor/mocked/implementation.py +220 -0
  30. runnable/extensions/experiment_tracker/__init__.py +0 -0
  31. runnable/extensions/experiment_tracker/mlflow/__init__.py +0 -0
  32. runnable/extensions/experiment_tracker/mlflow/implementation.py +94 -0
  33. runnable/extensions/nodes.py +675 -0
  34. runnable/extensions/run_log_store/__init__.py +0 -0
  35. runnable/extensions/run_log_store/chunked_file_system/__init__.py +0 -0
  36. runnable/extensions/run_log_store/chunked_file_system/implementation.py +106 -0
  37. runnable/extensions/run_log_store/chunked_k8s_pvc/__init__.py +0 -0
  38. runnable/extensions/run_log_store/chunked_k8s_pvc/implementation.py +21 -0
  39. runnable/extensions/run_log_store/chunked_k8s_pvc/integration.py +61 -0
  40. runnable/extensions/run_log_store/db/implementation_FF.py +157 -0
  41. runnable/extensions/run_log_store/db/integration_FF.py +0 -0
  42. runnable/extensions/run_log_store/file_system/__init__.py +0 -0
  43. runnable/extensions/run_log_store/file_system/implementation.py +136 -0
  44. runnable/extensions/run_log_store/generic_chunked.py +541 -0
  45. runnable/extensions/run_log_store/k8s_pvc/__init__.py +0 -0
  46. runnable/extensions/run_log_store/k8s_pvc/implementation.py +21 -0
  47. runnable/extensions/run_log_store/k8s_pvc/integration.py +56 -0
  48. runnable/extensions/secrets/__init__.py +0 -0
  49. runnable/extensions/secrets/dotenv/__init__.py +0 -0
  50. runnable/extensions/secrets/dotenv/implementation.py +100 -0
  51. runnable/extensions/secrets/env_secrets/__init__.py +0 -0
  52. runnable/extensions/secrets/env_secrets/implementation.py +42 -0
  53. runnable/graph.py +464 -0
  54. runnable/integration.py +205 -0
  55. runnable/interaction.py +399 -0
  56. runnable/names.py +546 -0
  57. runnable/nodes.py +489 -0
  58. runnable/parameters.py +183 -0
  59. runnable/pickler.py +102 -0
  60. runnable/sdk.py +470 -0
  61. runnable/secrets.py +95 -0
  62. runnable/tasks.py +392 -0
  63. runnable/utils.py +630 -0
  64. runnable-0.2.0.dist-info/METADATA +437 -0
  65. runnable-0.2.0.dist-info/RECORD +69 -0
  66. runnable-0.2.0.dist-info/entry_points.txt +44 -0
  67. runnable-0.1.0.dist-info/METADATA +0 -16
  68. runnable-0.1.0.dist-info/RECORD +0 -6
  69. /runnable/{.gitkeep → extensions/__init__.py} +0 -0
  70. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/LICENSE +0 -0
  71. {runnable-0.1.0.dist-info → runnable-0.2.0.dist-info}/WHEEL +0 -0
runnable/datastore.py ADDED
@@ -0,0 +1,686 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Dict, List, Optional, OrderedDict, Tuple, Union
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+ import runnable.context as context
10
+ from runnable import defaults, exceptions
11
+
12
+ logger = logging.getLogger(defaults.LOGGER_NAME)
13
+
14
+ # Once defined these classes are sealed to any additions unless a default is provided
15
+ # Breaking this rule might make magnus backwardly incompatible
16
+
17
+
18
+ class DataCatalog(BaseModel, extra="allow"):
19
+ """
20
+ The captured attributes of a catalog item.
21
+ """
22
+
23
+ name: str #  The name of the dataset
24
+ data_hash: str = "" # The sha1 hash of the file
25
+ catalog_relative_path: str = "" # The file path relative the catalog location
26
+ catalog_handler_location: str = "" # The location of the catalog
27
+ stage: str = "" # The stage at which we recorded it get, put etc
28
+
29
+ # Needed for set operations to work on DataCatalog objects
30
+ def __hash__(self):
31
+ """
32
+ Needed to Uniqueize DataCatalog objects.
33
+ """
34
+ return hash(self.name)
35
+
36
+ def __eq__(self, other):
37
+ """
38
+ Needed for set operations to work on DataCatalog objects
39
+ """
40
+ if not isinstance(other, DataCatalog):
41
+ return False
42
+ return other.name == self.name
43
+
44
+
45
+ class StepAttempt(BaseModel):
46
+ """
47
+ The captured attributes of an Attempt of a step.
48
+ """
49
+
50
+ attempt_number: int = 0
51
+ start_time: str = ""
52
+ end_time: str = ""
53
+ duration: str = "" #  end_time - start_time
54
+ status: str = "FAIL"
55
+ message: str = ""
56
+ parameters: Dict[str, Any] = Field(default_factory=dict)
57
+
58
+
59
+ class CodeIdentity(BaseModel, extra="allow"):
60
+ """
61
+ The captured attributes of a code identity of a step.
62
+ """
63
+
64
+ code_identifier: Optional[str] = "" # GIT sha code or docker image id
65
+ code_identifier_type: Optional[str] = "" # git or docker
66
+ code_identifier_dependable: Optional[bool] = False # If git, checks if the tree is clean.
67
+ code_identifier_url: Optional[str] = "" # The git remote url or docker repository url
68
+ code_identifier_message: Optional[str] = "" # Any optional message
69
+
70
+
71
+ class StepLog(BaseModel):
72
+ """
73
+ The data class capturing the data of a Step
74
+ """
75
+
76
+ name: str
77
+ internal_name: str # Should be the dot notation of the step
78
+ status: str = "FAIL" #  Should have a better default
79
+ step_type: str = "task"
80
+ message: str = ""
81
+ mock: bool = False
82
+ code_identities: List[CodeIdentity] = Field(default_factory=list)
83
+ attempts: List[StepAttempt] = Field(default_factory=list)
84
+ user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
85
+ branches: Dict[str, BranchLog] = Field(default_factory=dict)
86
+ data_catalog: List[DataCatalog] = Field(default_factory=list)
87
+
88
+ def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
89
+ """
90
+ Given a stage, return the data catalogs according to the stage
91
+
92
+ Args:
93
+ stage (str, optional): The stage at which the data was cataloged. Defaults to 'put'.
94
+
95
+ Raises:
96
+ Exception: If the stage was not in get or put.
97
+
98
+ Returns:
99
+ List[DataCatalog]: The list of data catalogs as per the stage.
100
+ """
101
+ if stage not in ["get", "put"]:
102
+ raise Exception("Stage should be in get or put")
103
+
104
+ data_catalogs = []
105
+ if self.branches:
106
+ for _, branch in self.branches.items():
107
+ data_catalogs.extend(branch.get_data_catalogs_by_stage(stage=stage))
108
+
109
+ return [dc for dc in self.data_catalog if dc.stage == stage] + data_catalogs
110
+
111
+ def add_data_catalogs(self, data_catalogs: List[DataCatalog]):
112
+ """
113
+ Add the data catalogs as asked by the user
114
+
115
+ Args:
116
+ dict_catalogs ([DataCatalog]): A list of data catalog items
117
+ """
118
+
119
+ if not self.data_catalog:
120
+ self.data_catalog = []
121
+ for data_catalog in data_catalogs:
122
+ self.data_catalog.append(data_catalog)
123
+
124
+
125
+ class BranchLog(BaseModel):
126
+ """
127
+ The dataclass of captured data about a branch of a composite node.
128
+
129
+ Returns:
130
+ [type]: [description]
131
+ """
132
+
133
+ internal_name: str
134
+ status: str = "FAIL"
135
+ steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
136
+
137
+ def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
138
+ """
139
+ Given a stage, return the data catalogs according to the stage
140
+
141
+ Args:
142
+ stage (str, optional): The stage at which the data was cataloged. Defaults to 'put'.
143
+
144
+ Raises:
145
+ Exception: If the stage was not in get or put.
146
+
147
+ Returns:
148
+ List[DataCatalog]: The list of data catalogs as per the stage.
149
+ """
150
+ if stage not in ["get", "put"]:
151
+ raise Exception("Stage should be in get or put")
152
+
153
+ data_catalogs = []
154
+ for _, step in self.steps.items():
155
+ data_catalogs.extend(step.get_data_catalogs_by_stage(stage=stage))
156
+
157
+ return data_catalogs
158
+
159
+
160
+ # Needed for BranchLog of StepLog to be referenced
161
+ StepLog.model_rebuild()
162
+
163
+
164
+ class RunLog(BaseModel):
165
+ """
166
+ The data captured as part of Run Log
167
+ """
168
+
169
+ run_id: str
170
+ dag_hash: Optional[str] = None
171
+ use_cached: bool = False
172
+ tag: Optional[str] = ""
173
+ original_run_id: Optional[str] = ""
174
+ status: str = defaults.FAIL
175
+ steps: OrderedDict[str, StepLog] = Field(default_factory=OrderedDict)
176
+ parameters: Dict[str, Any] = Field(default_factory=dict)
177
+ run_config: Dict[str, Any] = Field(default_factory=dict)
178
+
179
+ def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
180
+ """
181
+ Return all the cataloged data by the stage at which they were cataloged.
182
+
183
+ Raises:
184
+ Exception: If stage was not either put or get.
185
+
186
+ Args:
187
+ stage (str, optional): [description]. Defaults to 'put'.
188
+ """
189
+ if stage not in ["get", "put"]:
190
+ raise Exception("Only get or put are allowed in stage")
191
+
192
+ data_catalogs = []
193
+ for _, step in self.steps.items():
194
+ data_catalogs.extend(step.get_data_catalogs_by_stage(stage=stage))
195
+
196
+ return list(set(data_catalogs))
197
+
198
+ def search_branch_by_internal_name(self, i_name: str) -> Tuple[Union[BranchLog, RunLog], Union[StepLog, None]]:
199
+ """
200
+ Given a branch internal name, search for it in the run log.
201
+
202
+ If the branch internal name is none, its the run log itself.
203
+
204
+ Args:
205
+ i_name (str): [description]
206
+
207
+ Raises:
208
+ exceptions.BranchLogNotFoundError: [description]
209
+
210
+ Returns:
211
+ Tuple[BranchLog, StepLog]: [description]
212
+ """
213
+ # internal name is null for base dag
214
+ if not i_name:
215
+ return self, None
216
+
217
+ dot_path = i_name.split(".")
218
+
219
+ # any internal name of a branch when split against .
220
+ # goes step.branch.step.branch
221
+ # If its odd, its a step, if its even its a branch
222
+ current_steps = self.steps
223
+ current_step = None
224
+ current_branch = None
225
+
226
+ for i in range(len(dot_path)):
227
+ if i % 2:
228
+ # Its odd, so we are in branch
229
+ # Get the branch that holds the step
230
+ current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
231
+ current_steps = current_branch.steps
232
+ logger.debug(f"Finding branch {i_name} in branch: {current_branch}")
233
+ else:
234
+ # Its even, so we are in step, we start here!
235
+ # Get the step that holds the branch
236
+ current_step = current_steps[".".join(dot_path[: i + 1])]
237
+ logger.debug(f"Finding branch for {i_name} in step: {current_step}")
238
+
239
+ logger.debug(f"current branch : {current_branch}, current step {current_step}")
240
+ if current_branch and current_step:
241
+ return current_branch, current_step
242
+
243
+ raise exceptions.BranchLogNotFoundError(self.run_id, i_name)
244
+
245
+ def search_step_by_internal_name(self, i_name: str) -> Tuple[StepLog, Union[BranchLog, None]]:
246
+ """
247
+ Given a steps internal name, search for the step name.
248
+
249
+ If the step name when split against '.' is 1, it is the run log
250
+
251
+ Args:
252
+ i_name (str): [description]
253
+
254
+ Raises:
255
+ exceptions.StepLogNotFoundError: [description]
256
+
257
+ Returns:
258
+ Tuple[StepLog, BranchLog]: [description]
259
+ """
260
+ dot_path = i_name.split(".")
261
+ if len(dot_path) == 1:
262
+ return self.steps[i_name], None
263
+
264
+ current_steps = self.steps
265
+ current_step = None
266
+ current_branch = None
267
+ for i in range(len(dot_path)):
268
+ if i % 2:
269
+ # Its odd, so we are in brach name
270
+ current_branch = current_step.branches[".".join(dot_path[: i + 1])] # type: ignore
271
+ current_steps = current_branch.steps
272
+ logger.debug(f"Finding step log for {i_name} in branch: {current_branch}")
273
+ else:
274
+ # Its even, so we are in step, we start here!
275
+ current_step = current_steps[".".join(dot_path[: i + 1])]
276
+ logger.debug(f"Finding step log for {i_name} in step: {current_step}")
277
+
278
+ logger.debug(f"current branch : {current_branch}, current step {current_step}")
279
+ if current_branch and current_step:
280
+ return current_step, current_branch
281
+
282
+ raise exceptions.StepLogNotFoundError(self.run_id, i_name)
283
+
284
+
285
+ # All outside modules should interact with dataclasses using the RunLogStore to promote extensibility
286
+ # If you want to customize dataclass, extend BaseRunLogStore and implement the methods as per the specification
287
+
288
+
289
+ class BaseRunLogStore(ABC, BaseModel):
290
+ """
291
+ The base class of a Run Log Store with many common methods implemented.
292
+ """
293
+
294
+ service_name: str = ""
295
+ service_type: str = "run_log_store"
296
+
297
+ @property
298
+ def _context(self):
299
+ return context.run_context
300
+
301
+ @abstractmethod
302
+ def create_run_log(
303
+ self,
304
+ run_id: str,
305
+ dag_hash: str = "",
306
+ use_cached: bool = False,
307
+ tag: str = "",
308
+ original_run_id: str = "",
309
+ status: str = defaults.CREATED,
310
+ **kwargs,
311
+ ):
312
+ """
313
+ Creates a Run Log object by using the config
314
+
315
+ Logically the method should do the following:
316
+ * Creates a Run log
317
+ * Adds it to the db
318
+ * Return the log
319
+ Raises:
320
+ NotImplementedError: This is a base class and therefore has no default implementation
321
+ """
322
+
323
+ raise NotImplementedError
324
+
325
+ @abstractmethod
326
+ def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs) -> RunLog:
327
+ """
328
+ Retrieves a Run log from the database using the config and the run_id
329
+
330
+ Args:
331
+ run_id (str): The run_id of the run
332
+ full (bool): return the full run log store or only the RunLog object
333
+
334
+ Returns:
335
+ RunLog: The RunLog object identified by the run_id
336
+
337
+ Logically the method should:
338
+ * Returns the run_log defined by id from the data store defined by the config
339
+
340
+ Raises:
341
+ NotImplementedError: This is a base class and therefore has no default implementation
342
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
343
+ """
344
+
345
+ raise NotImplementedError
346
+
347
+ @abstractmethod
348
+ def put_run_log(self, run_log: RunLog, **kwargs):
349
+ """
350
+ Puts the Run Log in the database as defined by the config
351
+
352
+ Args:
353
+ run_log (RunLog): The Run log of the run
354
+
355
+ Logically the method should:
356
+ Puts the run_log into the database
357
+
358
+ Raises:
359
+ NotImplementedError: This is a base class and therefore has no default implementation
360
+ """
361
+ raise NotImplementedError
362
+
363
+ def update_run_log_status(self, run_id: str, status: str):
364
+ """
365
+ Updates the status of the Run Log defined by the run_id
366
+
367
+ Args:
368
+ run_id (str): The run_id of the run
369
+ status (str): The new status of the run
370
+ """
371
+ logger.info(f"Updating status of run_id {run_id} to {status}")
372
+ run_log = self.get_run_log_by_id(run_id, full=False)
373
+ run_log.status = status
374
+ self.put_run_log(run_log)
375
+
376
+ def get_parameters(self, run_id: str, **kwargs) -> dict:
377
+ """
378
+ Get the parameters from the Run log defined by the run_id
379
+
380
+ Args:
381
+ run_id (str): The run_id of the run
382
+
383
+ The method should:
384
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
385
+ * Return the parameters as identified in the run_log
386
+
387
+ Returns:
388
+ dict: A dictionary of the run_log parameters
389
+ Raises:
390
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
391
+ """
392
+ run_log = self.get_run_log_by_id(run_id=run_id)
393
+ return run_log.parameters
394
+
395
+ def set_parameters(self, run_id: str, parameters: dict, **kwargs):
396
+ """
397
+ Update the parameters of the Run log with the new parameters
398
+
399
+ This method would over-write the parameters, if the parameter exists in the run log already
400
+
401
+ The method should:
402
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
403
+ * Update the parameters of the run_log
404
+ * Call put_run_log(run_log) to put the run_log in the datastore
405
+
406
+ Args:
407
+ run_id (str): The run_id of the run
408
+ parameters (dict): The parameters to update in the run log
409
+ Raises:
410
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
411
+ """
412
+ run_log = self.get_run_log_by_id(run_id=run_id)
413
+ run_log.parameters.update(parameters)
414
+ self.put_run_log(run_log=run_log)
415
+
416
+ def get_run_config(self, run_id: str, **kwargs) -> dict:
417
+ """
418
+ Given a run_id, return the run_config used to perform the run.
419
+
420
+ Args:
421
+ run_id (str): The run_id of the run
422
+
423
+ Returns:
424
+ dict: The run config used for the run
425
+ """
426
+
427
+ run_log = self.get_run_log_by_id(run_id=run_id)
428
+ return run_log.run_config
429
+
430
+ def set_run_config(self, run_id: str, run_config: dict, **kwargs):
431
+ """Set the run config used to run the run_id
432
+
433
+ Args:
434
+ run_id (str): The run_id of the run
435
+ run_config (dict): The run_config of the run
436
+ """
437
+
438
+ run_log = self.get_run_log_by_id(run_id=run_id)
439
+ run_log.run_config.update(run_config)
440
+ self.put_run_log(run_log=run_log)
441
+
442
+ def create_step_log(self, name: str, internal_name: str, **kwargs):
443
+ """
444
+ Create a step log by the name and internal name
445
+
446
+ The method does not update the Run Log with the step log at this point in time.
447
+ This method is just an interface for external modules to create a step log
448
+
449
+
450
+ Args:
451
+ name (str): The friendly name of the step log
452
+ internal_name (str): The internal naming of the step log. The internal naming is a dot path convention
453
+
454
+ Returns:
455
+ StepLog: A uncommitted step log object
456
+ """
457
+ logger.info(f"{self.service_name} Creating a Step Log: {internal_name}")
458
+ return StepLog(name=name, internal_name=internal_name, status=defaults.CREATED)
459
+
460
+ def get_step_log(self, internal_name: str, run_id: str, **kwargs) -> StepLog:
461
+ """
462
+ Get a step log from the datastore for run_id and the internal naming of the step log
463
+
464
+ The internal naming of the step log is a dot path convention.
465
+
466
+ The method should:
467
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
468
+ * Identify the step location by decoding the internal naming
469
+ * Return the step log
470
+
471
+ Args:
472
+ internal_name (str): The internal name of the step log
473
+ run_id (str): The run_id of the run
474
+
475
+ Returns:
476
+ StepLog: The step log object for the step defined by the internal naming and run_id
477
+
478
+ Raises:
479
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
480
+ StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
481
+ """
482
+ logger.info(f"{self.service_name} Getting the step log: {internal_name} of {run_id}")
483
+ run_log = self.get_run_log_by_id(run_id=run_id)
484
+ step_log, _ = run_log.search_step_by_internal_name(internal_name)
485
+ return step_log
486
+
487
+ def add_step_log(self, step_log: StepLog, run_id: str, **kwargs):
488
+ """
489
+ Add the step log in the run log as identified by the run_id in the datastore
490
+
491
+ The method should:
492
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
493
+ * Identify the branch to add the step by decoding the step_logs internal name
494
+ * Add the step log to the identified branch log
495
+ * Call put_run_log(run_log) to put the run_log in the datastore
496
+
497
+ Args:
498
+ step_log (StepLog): The Step log to add to the database
499
+ run_id (str): The run id of the run
500
+
501
+ Raises:
502
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
503
+ BranchLogNotFoundError: If the branch of the step log for internal_name is not found in the datastore
504
+ for run_id
505
+ """
506
+ logger.info(f"{self.service_name} Adding the step log to DB: {step_log.name}")
507
+ run_log = self.get_run_log_by_id(run_id=run_id)
508
+
509
+ branch_to_add = ".".join(step_log.internal_name.split(".")[:-1])
510
+ branch, _ = run_log.search_branch_by_internal_name(branch_to_add)
511
+
512
+ if branch is None:
513
+ branch = run_log
514
+ branch.steps[step_log.internal_name] = step_log
515
+ self.put_run_log(run_log=run_log)
516
+
517
+ def create_branch_log(self, internal_branch_name: str, **kwargs) -> BranchLog:
518
+ """
519
+ Creates a uncommitted branch log object by the internal name given
520
+
521
+ Args:
522
+ internal_branch_name (str): Creates a branch log by name internal_branch_name
523
+
524
+ Returns:
525
+ BranchLog: Uncommitted and initialized with defaults BranchLog object
526
+ """
527
+ # Create a new BranchLog
528
+ logger.info(f"{self.service_name} Creating a Branch Log : {internal_branch_name}")
529
+ return BranchLog(internal_name=internal_branch_name, status=defaults.CREATED)
530
+
531
+ def get_branch_log(self, internal_branch_name: str, run_id: str, **kwargs) -> Union[BranchLog, RunLog]:
532
+ """
533
+ Returns the branch log by the internal branch name for the run id
534
+
535
+ If the internal branch name is none, returns the run log
536
+
537
+ Args:
538
+ internal_branch_name (str): The internal branch name to retrieve.
539
+ run_id (str): The run id of interest
540
+
541
+ Returns:
542
+ BranchLog: The branch log or the run log as requested.
543
+ """
544
+ run_log = self.get_run_log_by_id(run_id=run_id)
545
+ if not internal_branch_name:
546
+ return run_log
547
+ branch, _ = run_log.search_branch_by_internal_name(internal_branch_name)
548
+ return branch
549
+
550
+ def add_branch_log(self, branch_log: Union[BranchLog, RunLog], run_id: str, **kwargs):
551
+ """
552
+ The method should:
553
+ # Get the run log
554
+ # Get the branch and step containing the branch
555
+ # Add the branch to the step
556
+ # Write the run_log
557
+
558
+ The branch log could some times be a Run log and should be handled appropriately
559
+
560
+ Args:
561
+ branch_log (BranchLog): The branch log/run log to add to the database
562
+ run_id (str): The run id to which the branch/run log is added
563
+ """
564
+
565
+ internal_branch_name = None
566
+
567
+ if isinstance(branch_log, BranchLog):
568
+ internal_branch_name = branch_log.internal_name
569
+
570
+ if not internal_branch_name:
571
+ self.put_run_log(branch_log) # type: ignore # We are dealing with base dag here
572
+ return
573
+
574
+ run_log = self.get_run_log_by_id(run_id=run_id)
575
+
576
+ step_name = ".".join(internal_branch_name.split(".")[:-1])
577
+ step, _ = run_log.search_step_by_internal_name(step_name)
578
+
579
+ step.branches[internal_branch_name] = branch_log # type: ignore
580
+ self.put_run_log(run_log)
581
+
582
+ def create_attempt_log(self, **kwargs) -> StepAttempt:
583
+ """
584
+ Returns an uncommitted step attempt log.
585
+
586
+ Returns:
587
+ StepAttempt: An uncommitted step attempt log
588
+ """
589
+ logger.info(f"{self.service_name} Creating an attempt log")
590
+ return StepAttempt()
591
+
592
+ def create_code_identity(self, **kwargs) -> CodeIdentity:
593
+ """
594
+ Creates an uncommitted Code identity class
595
+
596
+ Returns:
597
+ CodeIdentity: An uncommitted code identity class
598
+ """
599
+ logger.info(f"{self.service_name} Creating Code identity")
600
+ return CodeIdentity()
601
+
602
+ def create_data_catalog(self, name: str, **kwargs) -> DataCatalog:
603
+ """
604
+ Create a uncommitted data catalog object
605
+
606
+ Args:
607
+ name (str): The name of the data catalog item to put
608
+
609
+ Returns:
610
+ DataCatalog: The DataCatalog object.
611
+ """
612
+ logger.info(f"{self.service_name} Creating Data Catalog for {name}")
613
+ return DataCatalog(name=name)
614
+
615
+
616
+ class BufferRunLogstore(BaseRunLogStore):
617
+ """
618
+ A in-memory run log store.
619
+
620
+ This Run Log store will not persist any results.
621
+
622
+ When to use:
623
+ When testing some part of the pipeline.
624
+
625
+ Do not use:
626
+ When you need to compare between runs or in production set up
627
+
628
+ This Run Log Store is concurrent write safe as it is in memory
629
+
630
+ Example config:
631
+ run_log:
632
+ type: buffered
633
+
634
+ """
635
+
636
+ service_name: str = "buffered"
637
+ run_log: Optional[RunLog] = Field(default=None, exclude=True) # For a buffered Run Log, this is the database
638
+
639
+ def create_run_log(
640
+ self,
641
+ run_id: str,
642
+ dag_hash: str = "",
643
+ use_cached: bool = False,
644
+ tag: str = "",
645
+ original_run_id: str = "",
646
+ status: str = defaults.CREATED,
647
+ **kwargs,
648
+ ) -> RunLog:
649
+ """
650
+ # Creates a Run log
651
+ # Adds it to the db
652
+ # Return the log
653
+
654
+ Refer to BaseRunLogStore.create_run_log
655
+ """
656
+
657
+ logger.info(f"{self.service_name} Creating a Run Log and adding it to DB")
658
+ self.run_log = RunLog(
659
+ run_id=run_id,
660
+ dag_hash=dag_hash,
661
+ use_cached=use_cached,
662
+ tag=tag,
663
+ original_run_id=original_run_id,
664
+ status=status,
665
+ )
666
+ return self.run_log
667
+
668
+ def get_run_log_by_id(self, run_id: str, full: bool = False, **kwargs):
669
+ """
670
+ # Returns the run_log defined by id
671
+ # Raises Exception if not found
672
+ """
673
+
674
+ logger.info(f"{self.service_name} Getting the run log from DB for {run_id}")
675
+ if self.run_log:
676
+ return self.run_log
677
+
678
+ raise exceptions.RunLogNotFoundError(run_id)
679
+
680
+ def put_run_log(self, run_log: RunLog, **kwargs):
681
+ """
682
+ # Puts the run log in the db
683
+ # Raises Exception if not found
684
+ """
685
+ logger.info(f"{self.service_name} Putting the run log in the DB: {run_log.run_id}")
686
+ self.run_log = run_log