runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,549 @@
1
+ import json
2
+ import logging
3
+ from abc import abstractmethod
4
+ from enum import Enum
5
+ from typing import Any, Dict, Union
6
+
7
+ from runnable import defaults, exceptions
8
+ from runnable.datastore import (
9
+ BaseRunLogStore,
10
+ BranchLog,
11
+ RunLog,
12
+ StepLog,
13
+ )
14
+
15
+ logger = logging.getLogger(defaults.LOGGER_NAME)
16
+
17
+
18
+ class ChunkedRunLogStore(BaseRunLogStore):
19
+ """
20
+ A generic implementation of a RunLogStore that stores RunLogs in chunks.
21
+ """
22
+
23
+ service_name: str = ""
24
+ supports_parallel_writes: bool = True
25
+
26
+ class LogTypes(Enum):
27
+ RUN_LOG = "RunLog"
28
+ BRANCH_LOG = "BranchLog"
29
+
30
+ class ModelTypes(Enum):
31
+ RUN_LOG = RunLog
32
+ BRANCH_LOG = BranchLog
33
+
34
+ def get_file_name(self, log_type: LogTypes, name: str = "") -> str:
35
+ """
36
+ Get the exact file name for a log type.
37
+
38
+ Args:
39
+ log_type (LogTypes): Either RUN_LOG or BRANCH_LOG
40
+ name (str, optional): The internal_branch_name for BranchLog. Defaults to ''.
41
+
42
+ Raises:
43
+ Exception: If log_type is not recognized or name is missing for BRANCH_LOG
44
+
45
+ Returns:
46
+ str: The exact file name
47
+ """
48
+ if log_type == self.LogTypes.RUN_LOG:
49
+ return self.LogTypes.RUN_LOG.value
50
+
51
+ if log_type == self.LogTypes.BRANCH_LOG:
52
+ if not name:
53
+ raise Exception("Name (internal_branch_name) required for BRANCH_LOG")
54
+ return f"{self.LogTypes.BRANCH_LOG.value}-{name}"
55
+
56
+ raise Exception(f"Unexpected log type: {log_type}")
57
+
58
+ @abstractmethod
59
+ def _exists(self, run_id: str, name: str) -> bool:
60
+ """
61
+ Check if a file exists in the persistence layer.
62
+
63
+ Args:
64
+ run_id (str): The run id
65
+ name (str): The exact file name to check
66
+
67
+ Returns:
68
+ bool: True if file exists, False otherwise
69
+ """
70
+ ...
71
+
72
+ @abstractmethod
73
+ def _list_branch_logs(self, run_id: str) -> list[str]:
74
+ """
75
+ List all branch log file names for a run_id.
76
+
77
+ Args:
78
+ run_id (str): The run id
79
+
80
+ Returns:
81
+ list[str]: List of branch log file names (e.g., ["BranchLog-map.1", "BranchLog-map.2"])
82
+ """
83
+ ...
84
+
85
+ @abstractmethod
86
+ def _store(self, run_id: str, contents: dict, name: str, insert: bool = False):
87
+ """
88
+ Store the contents against the name in the persistence layer.
89
+
90
+ Args:
91
+ run_id (str): The run id
92
+ contents (dict): The dict to store
93
+ name (str): The name to store as
94
+ """
95
+ ...
96
+
97
+ @abstractmethod
98
+ def _retrieve(self, run_id: str, name: str) -> dict:
99
+ """
100
+ Does the job of retrieving from the persistent layer.
101
+
102
+ Args:
103
+ name (str): the name of the file to retrieve
104
+
105
+ Returns:
106
+ dict: The contents
107
+ """
108
+ ...
109
+
110
+ def store(self, run_id: str, log_type: LogTypes, contents: dict, name: str = ""):
111
+ """Store a log in the persistence layer.
112
+
113
+ Args:
114
+ run_id (str): The run id to store against
115
+ log_type (LogTypes): The type of log to store (RUN_LOG or BRANCH_LOG)
116
+ contents (dict): The dict of contents to store
117
+ name (str, optional): The internal_branch_name for BRANCH_LOG. Defaults to ''.
118
+ """
119
+ file_name = self.get_file_name(log_type=log_type, name=name)
120
+
121
+ # Check if file exists to determine if this is an update or insert
122
+ insert = not self._exists(run_id=run_id, name=file_name)
123
+
124
+ if not insert:
125
+ # File exists - merge with existing contents
126
+ existing_contents = self._retrieve(run_id=run_id, name=file_name)
127
+ contents = dict(existing_contents, **contents)
128
+
129
+ self._store(run_id=run_id, contents=contents, name=file_name, insert=insert)
130
+
131
+ def retrieve(self, run_id: str, log_type: LogTypes, name: str = "") -> Any:
132
+ """
133
+ Retrieve a log model by type and name.
134
+
135
+ Args:
136
+ run_id (str): The run id
137
+ log_type (LogTypes): Either RUN_LOG or BRANCH_LOG
138
+ name (str, optional): The internal_branch_name for BRANCH_LOG. Defaults to ''.
139
+
140
+ Raises:
141
+ Exception: If name is missing for BRANCH_LOG
142
+ EntityNotFoundError: If the file is not found
143
+
144
+ Returns:
145
+ Union[RunLog, BranchLog]: The requested log object
146
+ """
147
+ if log_type == self.LogTypes.BRANCH_LOG and not name:
148
+ raise Exception("Name (internal_branch_name) required for BRANCH_LOG")
149
+
150
+ file_name = self.get_file_name(log_type=log_type, name=name)
151
+
152
+ if not self._exists(run_id=run_id, name=file_name):
153
+ raise exceptions.EntityNotFoundError()
154
+
155
+ contents = self._retrieve(run_id=run_id, name=file_name)
156
+ model_class = self.ModelTypes[log_type.name].value
157
+ return model_class.model_validate(contents)
158
+
159
+ def _get_parent_branch(self, name: str) -> Union[str, None]:
160
+ """
161
+ Returns the name of the parent branch.
162
+ If the step is part of main dag, return None.
163
+
164
+ Args:
165
+ name (str): The name of the step.
166
+
167
+ Returns:
168
+ str: The name of the branch containing the step.
169
+ """
170
+ dot_path = name.split(".")
171
+
172
+ if len(dot_path) == 1:
173
+ return None
174
+ # Ignore the step name
175
+ return ".".join(dot_path[:-1])
176
+
177
+ def _get_parent_step(self, name: str) -> Union[str, None]:
178
+ """
179
+ Returns the step containing the step, useful when we have steps within a branch.
180
+ Returns None, if the step belongs to parent dag.
181
+
182
+ Args:
183
+ name (str): The name of the step to find the parent step it belongs to.
184
+
185
+ Returns:
186
+ str: The parent step the step belongs to, None if the step belongs to parent dag.
187
+ """
188
+ dot_path = name.split(".")
189
+
190
+ if len(dot_path) == 1:
191
+ return None
192
+ # Ignore the branch.step_name
193
+ return ".".join(dot_path[:-2])
194
+
195
+ def _prepare_full_run_log(self, run_log: RunLog):
196
+ """
197
+ Populate run log with branch logs.
198
+
199
+ Since branches now contain their own steps and parameters,
200
+ we just need to attach branches to their parent steps.
201
+ """
202
+ run_id = run_log.run_id
203
+
204
+ # Get all branch log file names
205
+ branch_file_names = self._list_branch_logs(run_id=run_id)
206
+ if not branch_file_names:
207
+ return
208
+
209
+ # Load all branch logs
210
+ branch_logs: Dict[str, BranchLog] = {}
211
+ for file_name in branch_file_names:
212
+ contents = self._retrieve(run_id=run_id, name=file_name)
213
+ branch_log = BranchLog.model_validate(contents)
214
+ branch_logs[branch_log.internal_name] = branch_log
215
+
216
+ # Attach branches to their parent steps
217
+ for branch_name, branch_log in branch_logs.items():
218
+ # For a branch like "conditional.heads", parent step is "conditional"
219
+ # For a branch like "map.a.nested", parent step is "map.a"
220
+ dot_path = branch_name.split(".")
221
+ if len(dot_path) < 2:
222
+ # Branches must have at least step.branch format
223
+ continue
224
+
225
+ parent_step_name = ".".join(dot_path[:-1])
226
+
227
+ # Find parent step (could be in run_log or another branch)
228
+ parent_branch_name = self._get_parent_branch(parent_step_name)
229
+ if parent_branch_name and parent_branch_name in branch_logs:
230
+ parent_step = branch_logs[parent_branch_name].steps.get(
231
+ parent_step_name
232
+ )
233
+ else:
234
+ parent_step = run_log.steps.get(parent_step_name)
235
+
236
+ if parent_step:
237
+ parent_step.branches[branch_name] = branch_log
238
+
239
+ def create_run_log(
240
+ self,
241
+ run_id: str,
242
+ dag_hash: str = "",
243
+ use_cached: bool = False,
244
+ tag: str = "",
245
+ original_run_id: str = "",
246
+ status: str = defaults.CREATED,
247
+ ):
248
+ """
249
+ Creates a Run Log object by using the config
250
+
251
+ Logically the method should do the following:
252
+ * Creates a Run log
253
+ * Adds it to the db
254
+ * Return the log
255
+ """
256
+ try:
257
+ self.get_run_log_by_id(run_id=run_id, full=False)
258
+ raise exceptions.RunLogExistsError(run_id=run_id)
259
+ except exceptions.RunLogNotFoundError:
260
+ pass
261
+
262
+ logger.info(f"{self.service_name} Creating a Run Log for : {run_id}")
263
+ run_log = RunLog(
264
+ run_id=run_id,
265
+ dag_hash=dag_hash,
266
+ tag=tag,
267
+ status=status,
268
+ )
269
+
270
+ self.store(
271
+ run_id=run_id,
272
+ contents=json.loads(run_log.model_dump_json()),
273
+ log_type=self.LogTypes.RUN_LOG,
274
+ )
275
+ return run_log
276
+
277
+ def get_run_log_by_id(self, run_id: str, full: bool = False) -> RunLog:
278
+ """
279
+ Retrieves a Run log from the database using the config and the run_id
280
+
281
+ Args:
282
+ run_id (str): The run_id of the run
283
+ full (bool): return the full run log store or only the RunLog object
284
+
285
+ Returns:
286
+ RunLog: The RunLog object identified by the run_id
287
+
288
+ Logically the method should:
289
+ * Returns the run_log defined by id from the data store defined by the config
290
+
291
+ """
292
+ try:
293
+ logger.info(f"{self.service_name} Getting a Run Log for : {run_id}")
294
+ run_log = self.retrieve(run_id=run_id, log_type=self.LogTypes.RUN_LOG)
295
+
296
+ if full:
297
+ self._prepare_full_run_log(run_log=run_log)
298
+
299
+ return run_log
300
+ except exceptions.EntityNotFoundError as e:
301
+ raise exceptions.RunLogNotFoundError(run_id) from e
302
+
303
+ def put_run_log(self, run_log: RunLog):
304
+ """
305
+ Puts the Run Log in the database as defined by the config
306
+
307
+ Args:
308
+ run_log (RunLog): The Run log of the run
309
+
310
+ Logically the method should:
311
+ Puts the run_log into the database
312
+
313
+ Raises:
314
+ NotImplementedError: This is a base class and therefore has no default implementation
315
+ """
316
+ run_id = run_log.run_id
317
+ self.store(
318
+ run_id=run_id,
319
+ contents=json.loads(run_log.model_dump_json()),
320
+ log_type=self.LogTypes.RUN_LOG,
321
+ )
322
+
323
+ def get_parameters(self, run_id: str, internal_branch_name: str = "") -> dict:
324
+ """
325
+ Get parameters from RunLog or BranchLog.
326
+
327
+ Args:
328
+ run_id (str): The run_id of the run
329
+ internal_branch_name (str): If provided, get from that branch
330
+
331
+ Returns:
332
+ dict: Parameters from the specified scope
333
+ """
334
+ if internal_branch_name:
335
+ branch = self.retrieve(
336
+ run_id=run_id,
337
+ log_type=self.LogTypes.BRANCH_LOG,
338
+ name=internal_branch_name,
339
+ )
340
+ return branch.parameters
341
+
342
+ run_log = self.get_run_log_by_id(run_id=run_id)
343
+ return run_log.parameters
344
+
345
+ def set_parameters(
346
+ self, run_id: str, parameters: dict, internal_branch_name: str = ""
347
+ ):
348
+ """
349
+ Set parameters on RunLog or BranchLog.
350
+
351
+ Args:
352
+ run_id (str): The run_id of the run
353
+ parameters (dict): Parameters to set
354
+ internal_branch_name (str): If provided, set on that branch
355
+ """
356
+ if internal_branch_name:
357
+ branch = self.retrieve(
358
+ run_id=run_id,
359
+ log_type=self.LogTypes.BRANCH_LOG,
360
+ name=internal_branch_name,
361
+ )
362
+ branch.parameters.update(parameters)
363
+ self.store(
364
+ run_id=run_id,
365
+ log_type=self.LogTypes.BRANCH_LOG,
366
+ contents=json.loads(branch.model_dump_json()),
367
+ name=internal_branch_name,
368
+ )
369
+ else:
370
+ run_log = self.get_run_log_by_id(run_id=run_id)
371
+ run_log.parameters.update(parameters)
372
+ self.put_run_log(run_log)
373
+
374
+ def get_run_config(self, run_id: str) -> dict:
375
+ """
376
+ Given a run_id, return the run_config used to perform the run.
377
+
378
+ Args:
379
+ run_id (str): The run_id of the run
380
+
381
+ Returns:
382
+ dict: The run config used for the run
383
+ """
384
+
385
+ run_log = self.get_run_log_by_id(run_id=run_id)
386
+ return run_log.run_config
387
+
388
+ def set_run_config(self, run_id: str, run_config: dict):
389
+ """Set the run config used to run the run_id
390
+
391
+ Args:
392
+ run_id (str): The run_id of the run
393
+ run_config (dict): The run_config of the run
394
+ """
395
+
396
+ run_log = self.get_run_log_by_id(run_id=run_id)
397
+ run_log.run_config.update(run_config)
398
+ self.put_run_log(run_log=run_log)
399
+
400
+ def get_step_log(self, internal_name: str, run_id: str) -> StepLog:
401
+ """
402
+ Get a step log from the datastore for run_id and the internal naming of the step log
403
+
404
+ The internal naming of the step log is a dot path convention.
405
+
406
+ The method should:
407
+ * Call get_run_log_by_id(run_id) to retrieve the run_log
408
+ * Identify the step location by decoding the internal naming
409
+ * Return the step log
410
+
411
+ Args:
412
+ internal_name (str): The internal name of the step log
413
+ run_id (str): The run_id of the run
414
+
415
+ Returns:
416
+ StepLog: The step log object for the step defined by the internal naming and run_id
417
+
418
+ Raises:
419
+ RunLogNotFoundError: If the run log for run_id is not found in the datastore
420
+ StepLogNotFoundError: If the step log for internal_name is not found in the datastore for run_id
421
+ """
422
+ logger.info(
423
+ f"{self.service_name} Getting the step log: {internal_name} of {run_id}"
424
+ )
425
+
426
+ # Determine if step is in a branch or root
427
+ parent_branch = self._get_parent_branch(internal_name)
428
+
429
+ if not parent_branch:
430
+ # Root-level step - get from RunLog
431
+ run_log = self.get_run_log_by_id(run_id=run_id)
432
+ if internal_name not in run_log.steps:
433
+ raise exceptions.StepLogNotFoundError(
434
+ run_id=run_id, step_name=internal_name
435
+ )
436
+ return run_log.steps[internal_name]
437
+ else:
438
+ # Branch step - get from BranchLog
439
+ try:
440
+ branch_log = self.retrieve(
441
+ run_id=run_id,
442
+ log_type=self.LogTypes.BRANCH_LOG,
443
+ name=parent_branch,
444
+ )
445
+ if internal_name not in branch_log.steps:
446
+ raise exceptions.StepLogNotFoundError(
447
+ run_id=run_id, step_name=internal_name
448
+ )
449
+ return branch_log.steps[internal_name]
450
+ except exceptions.EntityNotFoundError as e:
451
+ raise exceptions.StepLogNotFoundError(
452
+ run_id=run_id, step_name=internal_name
453
+ ) from e
454
+
455
+ def add_step_log(self, step_log: StepLog, run_id: str):
456
+ """
457
+ Add the step log to its parent (RunLog or BranchLog).
458
+
459
+ Args:
460
+ step_log (StepLog): The Step log to add
461
+ run_id (str): The run id of the run
462
+ """
463
+ logger.info(f"{self.service_name} Adding step log: {step_log.internal_name}")
464
+
465
+ internal_name = step_log.internal_name
466
+ parent_branch = self._get_parent_branch(internal_name)
467
+
468
+ if not parent_branch:
469
+ # Root-level step - add to RunLog
470
+ run_log = self.get_run_log_by_id(run_id=run_id)
471
+ run_log.steps[internal_name] = step_log
472
+ self.put_run_log(run_log)
473
+ else:
474
+ # Branch step - add to BranchLog
475
+ branch_log = self.retrieve(
476
+ run_id=run_id,
477
+ log_type=self.LogTypes.BRANCH_LOG,
478
+ name=parent_branch,
479
+ )
480
+ branch_log.steps[internal_name] = step_log
481
+ self.store(
482
+ run_id=run_id,
483
+ log_type=self.LogTypes.BRANCH_LOG,
484
+ contents=json.loads(branch_log.model_dump_json()),
485
+ name=parent_branch,
486
+ )
487
+
488
+ def get_branch_log(
489
+ self, internal_branch_name: str, run_id: str
490
+ ) -> Union[BranchLog, RunLog]:
491
+ """
492
+ Returns the branch log by the internal branch name for the run id
493
+
494
+ If the internal branch name is none, returns the run log
495
+
496
+ Args:
497
+ internal_branch_name (str): The internal branch name to retrieve.
498
+ run_id (str): The run id of interest
499
+
500
+ Returns:
501
+ BranchLog: The branch log or the run log as requested.
502
+ """
503
+ try:
504
+ if not internal_branch_name:
505
+ return self.get_run_log_by_id(run_id=run_id)
506
+ branch = self.retrieve(
507
+ run_id=run_id,
508
+ log_type=self.LogTypes.BRANCH_LOG,
509
+ name=internal_branch_name,
510
+ )
511
+ return branch
512
+ except exceptions.EntityNotFoundError as e:
513
+ raise exceptions.BranchLogNotFoundError(
514
+ run_id=run_id, branch_name=internal_branch_name
515
+ ) from e
516
+
517
+ def add_branch_log(
518
+ self,
519
+ branch_log: Union[BranchLog, RunLog],
520
+ run_id: str,
521
+ ):
522
+ """
523
+ The method should:
524
+ # Get the run log
525
+ # Get the branch and step containing the branch
526
+ # Add the branch to the step
527
+ # Write the run_log
528
+
529
+ The branch log could some times be a Run log and should be handled appropriately
530
+
531
+ Args:
532
+ branch_log (BranchLog): The branch log/run log to add to the database
533
+ run_id (str): The run id to which the branch/run log is added
534
+ """
535
+ if not isinstance(branch_log, BranchLog):
536
+ self.put_run_log(branch_log)
537
+ return
538
+
539
+ internal_branch_name = branch_log.internal_name
540
+
541
+ logger.info(
542
+ f"{self.service_name} Adding the branch log to DB: {branch_log.internal_name}"
543
+ )
544
+ self.store(
545
+ run_id=run_id,
546
+ log_type=self.LogTypes.BRANCH_LOG,
547
+ contents=json.loads(branch_log.model_dump_json()),
548
+ name=internal_branch_name,
549
+ )
@@ -0,0 +1,114 @@
1
+ import json
2
+ import logging
3
+ from functools import lru_cache
4
+ from typing import Any, Dict
5
+
6
+ from cloudpathlib import S3Client, S3Path
7
+ from pydantic import Field, SecretStr
8
+
9
+ from extensions.run_log_store.any_path import AnyPathRunLogStore
10
+ from runnable import defaults
11
+ from runnable.datastore import RunLog
12
+
13
+ logger = logging.getLogger(defaults.LOGGER_NAME)
14
+
15
+
16
+ @lru_cache
17
+ def get_minio_client(
18
+ endpoint_url: str, aws_access_key_id: str, aws_secret_access_key: str
19
+ ) -> S3Client:
20
+ return S3Client(
21
+ endpoint_url=endpoint_url,
22
+ aws_access_key_id=aws_access_key_id,
23
+ aws_secret_access_key=aws_secret_access_key,
24
+ )
25
+
26
+
27
+ class MinioRunLogStore(AnyPathRunLogStore):
28
+ """
29
+ In this type of Run Log store, we use a file system to store the JSON run log.
30
+
31
+ Every single run is stored as a different file which makes it compatible across other store types.
32
+
33
+ When to use:
34
+ When locally testing a pipeline and have the need to compare across runs.
35
+ Its fully featured and perfectly fine if your local environment is where you would do everything.
36
+
37
+ Do not use:
38
+ If you need parallelization on local, this run log would not support it.
39
+
40
+ Example config:
41
+
42
+ run_log:
43
+ type: file-system
44
+ config:
45
+ log_folder: The folder to out the logs. Defaults to .run_log_store
46
+
47
+ """
48
+
49
+ service_name: str = "minio"
50
+
51
+ endpoint_url: str = Field(default="http://localhost:9002")
52
+ aws_access_key_id: SecretStr = SecretStr(secret_value="minioadmin")
53
+ aws_secret_access_key: SecretStr = SecretStr(secret_value="minioadmin")
54
+ bucket: str = Field(default="runnable/run-logs")
55
+
56
+ def get_summary(self) -> Dict[str, Any]:
57
+ summary = {
58
+ "Type": self.service_name,
59
+ "Location": f"{self.endpoint_url}/{self.bucket}",
60
+ }
61
+
62
+ return summary
63
+
64
+ def get_run_log_bucket(self) -> S3Path:
65
+ run_id = self._context.run_id
66
+
67
+ return S3Path(
68
+ f"s3://{self.bucket}/{run_id}/",
69
+ client=get_minio_client(
70
+ self.endpoint_url,
71
+ self.aws_access_key_id.get_secret_value(),
72
+ self.aws_secret_access_key.get_secret_value(),
73
+ ),
74
+ )
75
+
76
+ def write_to_path(self, run_log: RunLog):
77
+ """
78
+ Write the run log to the folder
79
+
80
+ Args:
81
+ run_log (RunLog): The run log to be added to the database
82
+ """
83
+ run_log_bucket = self.get_run_log_bucket()
84
+ run_log_bucket.mkdir(parents=True, exist_ok=True)
85
+
86
+ run_log_object = run_log_bucket / f"{run_log.run_id}.json"
87
+ run_log_object.write_text(
88
+ json.dumps(run_log.model_dump_json(), ensure_ascii=True, indent=4)
89
+ )
90
+
91
+ def read_from_path(self, run_id: str) -> RunLog:
92
+ """
93
+ Look into the run log folder for the run log for the run id.
94
+
95
+ If the run log does not exist, raise an exception. If it does, decode it
96
+ as a RunLog and return it
97
+
98
+ Args:
99
+ run_id (str): The requested run id to retrieve the run log store
100
+
101
+ Raises:
102
+ FileNotFoundError: If the Run Log has not been found.
103
+
104
+ Returns:
105
+ RunLog: The decoded Run log
106
+ """
107
+ run_log_bucket = self.get_run_log_bucket()
108
+
109
+ run_log_object = run_log_bucket / f"{run_id}.json"
110
+
111
+ run_log_text = json.loads(run_log_object.read_text())
112
+ run_log = RunLog(**json.loads(run_log_text))
113
+
114
+ return run_log
@@ -0,0 +1,15 @@
1
+ [project]
2
+ name = "run_log_store"
3
+ version = "0.0.0"
4
+ description = "Extensions to run log store"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = []
8
+
9
+
10
+ [build-system]
11
+ requires = ["hatchling"]
12
+ build-backend = "hatchling.build"
13
+
14
+ [tool.hatch.build.targets.wheel]
15
+ packages = ["."]
File without changes