runnable 0.50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. extensions/README.md +0 -0
  2. extensions/__init__.py +0 -0
  3. extensions/catalog/README.md +0 -0
  4. extensions/catalog/any_path.py +214 -0
  5. extensions/catalog/file_system.py +52 -0
  6. extensions/catalog/minio.py +72 -0
  7. extensions/catalog/pyproject.toml +14 -0
  8. extensions/catalog/s3.py +11 -0
  9. extensions/job_executor/README.md +0 -0
  10. extensions/job_executor/__init__.py +236 -0
  11. extensions/job_executor/emulate.py +70 -0
  12. extensions/job_executor/k8s.py +553 -0
  13. extensions/job_executor/k8s_job_spec.yaml +37 -0
  14. extensions/job_executor/local.py +35 -0
  15. extensions/job_executor/local_container.py +161 -0
  16. extensions/job_executor/pyproject.toml +16 -0
  17. extensions/nodes/README.md +0 -0
  18. extensions/nodes/__init__.py +0 -0
  19. extensions/nodes/conditional.py +301 -0
  20. extensions/nodes/fail.py +78 -0
  21. extensions/nodes/loop.py +394 -0
  22. extensions/nodes/map.py +477 -0
  23. extensions/nodes/parallel.py +281 -0
  24. extensions/nodes/pyproject.toml +15 -0
  25. extensions/nodes/stub.py +93 -0
  26. extensions/nodes/success.py +78 -0
  27. extensions/nodes/task.py +156 -0
  28. extensions/pipeline_executor/README.md +0 -0
  29. extensions/pipeline_executor/__init__.py +871 -0
  30. extensions/pipeline_executor/argo.py +1266 -0
  31. extensions/pipeline_executor/emulate.py +119 -0
  32. extensions/pipeline_executor/local.py +226 -0
  33. extensions/pipeline_executor/local_container.py +369 -0
  34. extensions/pipeline_executor/mocked.py +159 -0
  35. extensions/pipeline_executor/pyproject.toml +16 -0
  36. extensions/run_log_store/README.md +0 -0
  37. extensions/run_log_store/__init__.py +0 -0
  38. extensions/run_log_store/any_path.py +100 -0
  39. extensions/run_log_store/chunked_fs.py +122 -0
  40. extensions/run_log_store/chunked_minio.py +141 -0
  41. extensions/run_log_store/file_system.py +91 -0
  42. extensions/run_log_store/generic_chunked.py +549 -0
  43. extensions/run_log_store/minio.py +114 -0
  44. extensions/run_log_store/pyproject.toml +15 -0
  45. extensions/secrets/README.md +0 -0
  46. extensions/secrets/dotenv.py +62 -0
  47. extensions/secrets/pyproject.toml +15 -0
  48. runnable/__init__.py +108 -0
  49. runnable/catalog.py +141 -0
  50. runnable/cli.py +484 -0
  51. runnable/context.py +730 -0
  52. runnable/datastore.py +1058 -0
  53. runnable/defaults.py +159 -0
  54. runnable/entrypoints.py +390 -0
  55. runnable/exceptions.py +137 -0
  56. runnable/executor.py +561 -0
  57. runnable/gantt.py +1646 -0
  58. runnable/graph.py +501 -0
  59. runnable/names.py +546 -0
  60. runnable/nodes.py +593 -0
  61. runnable/parameters.py +217 -0
  62. runnable/pickler.py +96 -0
  63. runnable/sdk.py +1277 -0
  64. runnable/secrets.py +92 -0
  65. runnable/tasks.py +1268 -0
  66. runnable/telemetry.py +142 -0
  67. runnable/utils.py +423 -0
  68. runnable-0.50.0.dist-info/METADATA +189 -0
  69. runnable-0.50.0.dist-info/RECORD +72 -0
  70. runnable-0.50.0.dist-info/WHEEL +4 -0
  71. runnable-0.50.0.dist-info/entry_points.txt +53 -0
  72. runnable-0.50.0.dist-info/licenses/LICENSE +201 -0
runnable/telemetry.py ADDED
@@ -0,0 +1,142 @@
1
+ """
2
+ Telemetry support for runnable pipelines.
3
+
4
+ Uses logfire-api for zero-dependency instrumentation.
5
+ If logfire is installed, spans are emitted. If not, all calls are no-ops.
6
+
7
+ For real-time streaming (e.g., FastAPI SSE), use StreamingSpanProcessor.
8
+ """
9
+
10
+ import json
11
+ from contextvars import ContextVar
12
+ from queue import Queue
13
+ from typing import Any, Optional
14
+
15
+ import logfire_api as logfire # noqa: F401 - re-exported for convenience
16
+
17
+ # Context var for active stream queue (set by FastAPI when SSE is active)
18
+ _stream_queue: ContextVar[Optional[Queue]] = ContextVar("stream_queue", default=None)
19
+
20
+
21
+ def truncate_value(value: Any, max_bytes: int = 256) -> Any:
22
+ """
23
+ Truncate a single serialized value to max_bytes.
24
+
25
+ Args:
26
+ value: Any JSON-serializable value
27
+ max_bytes: Maximum length for string representation
28
+
29
+ Returns:
30
+ The value (possibly truncated if string representation exceeds max_bytes)
31
+ """
32
+ try:
33
+ serialized = json.dumps(value, default=str)
34
+ if len(serialized) > max_bytes:
35
+ # Return truncated string representation
36
+ return serialized[: max_bytes - 3] + "..."
37
+ return serialized
38
+ except Exception:
39
+ return f"<unserializable: {type(value).__name__}>"
40
+
41
+
42
+ def set_stream_queue(q: Optional[Queue]) -> None:
43
+ """
44
+ Set the queue for streaming spans.
45
+
46
+ Called by FastAPI endpoint to enable real-time span streaming.
47
+
48
+ Args:
49
+ q: Queue to push span data to, or None to disable streaming
50
+ """
51
+ _stream_queue.set(q)
52
+
53
+
54
+ def get_stream_queue() -> Optional[Queue]:
55
+ """
56
+ Get the current stream queue.
57
+
58
+ Returns:
59
+ The active Queue if SSE streaming is enabled, None otherwise
60
+ """
61
+ return _stream_queue.get()
62
+
63
+
64
+ # Optional OTEL imports for streaming processor
65
+ try:
66
+ from opentelemetry.sdk.trace import ReadableSpan, SpanProcessor
67
+
68
+ OTEL_AVAILABLE = True
69
+ except ImportError:
70
+ OTEL_AVAILABLE = False
71
+ SpanProcessor = object # type: ignore
72
+ ReadableSpan = object # type: ignore
73
+
74
+
75
+ if OTEL_AVAILABLE:
76
+
77
+ class StreamingSpanProcessor(SpanProcessor):
78
+ """
79
+ SpanProcessor that:
80
+ 1. Always forwards to base processor (collector export) if provided
81
+ 2. Also pushes to stream queue if SSE is active
82
+
83
+ This enables dual output: persistent collector storage AND
84
+ real-time streaming to UI.
85
+ """
86
+
87
+ def __init__(self, base_processor: Optional[SpanProcessor] = None):
88
+ """
89
+ Initialize the streaming processor.
90
+
91
+ Args:
92
+ base_processor: Optional underlying processor for collector export
93
+ """
94
+ self.base_processor = base_processor
95
+
96
+ def on_start(self, span, parent_context=None):
97
+ """Called when a span starts."""
98
+ if self.base_processor:
99
+ self.base_processor.on_start(span, parent_context)
100
+
101
+ q = _stream_queue.get()
102
+ if q is not None:
103
+ q.put_nowait(
104
+ {
105
+ "type": "span_start",
106
+ "name": span.name,
107
+ "span_id": format(span.context.span_id, "016x"),
108
+ }
109
+ )
110
+
111
+ def on_end(self, span: ReadableSpan):
112
+ """Called when a span ends."""
113
+ if self.base_processor:
114
+ self.base_processor.on_end(span)
115
+
116
+ q = _stream_queue.get()
117
+ if q is not None:
118
+ q.put_nowait(
119
+ {
120
+ "type": "span_end",
121
+ "name": span.name,
122
+ "span_id": format(span.context.span_id, "016x"),
123
+ "status": span.status.status_code.name,
124
+ "duration_ms": (span.end_time - span.start_time) # type: ignore
125
+ / 1_000_000, # ty: ignore
126
+ "attributes": dict(span.attributes) if span.attributes else {},
127
+ }
128
+ )
129
+
130
+ def shutdown(self):
131
+ """Shutdown the processor."""
132
+ if self.base_processor:
133
+ self.base_processor.shutdown()
134
+
135
+ def force_flush(self, timeout_millis=None):
136
+ """Force flush any pending spans."""
137
+ if self.base_processor:
138
+ self.base_processor.force_flush(timeout_millis) # ty: ignore
139
+
140
+ else:
141
+ # Placeholder when OTEL is not installed
142
+ StreamingSpanProcessor = None # type: ignore
runnable/utils.py ADDED
@@ -0,0 +1,423 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import os
7
+ import random
8
+ import string
9
+ import subprocess
10
+ import time
11
+ from collections import OrderedDict
12
+ from pathlib import Path
13
+ from string import Template as str_template
14
+ from typing import Any, Dict, Optional, Tuple, Union
15
+
16
+ from ruamel.yaml import YAML
17
+
18
+ import runnable.context as context
19
+ from runnable import console, defaults
20
+ from runnable.defaults import IterableParameterModel
21
+
22
+ logger = logging.getLogger(defaults.LOGGER_NAME)
23
+ logging.getLogger("stevedore").setLevel(logging.CRITICAL)
24
+
25
+
26
+ def does_file_exist(file_path: str) -> bool:
27
+ """Check if a file exists.
28
+ Implemented here to avoid repetition of logic.
29
+
30
+ Args:
31
+ file_path (str): The file path to check
32
+
33
+ Returns:
34
+ bool: False if it does not otherwise True
35
+ """
36
+ my_file = Path(file_path)
37
+ return my_file.is_file()
38
+
39
+
40
+ def does_dir_exist(file_path: Union[str, Path]) -> bool:
41
+ """Check if a directory exists.
42
+ Implemented here to avoid repetition of logic.
43
+
44
+ Args:
45
+ file_path (str or Path): The directory path to check
46
+
47
+ Returns:
48
+ bool: False if the directory does not exist, True otherwise
49
+ """
50
+ my_file = Path(file_path)
51
+ return my_file.is_dir()
52
+
53
+
54
+ def safe_make_dir(directory: Union[str, Path]):
55
+ """Safely make the directory.
56
+ Ignore if it exists and create the parents if necessary.
57
+
58
+ Args:
59
+ directory (str): The directory path to create
60
+ """
61
+ Path(directory).mkdir(parents=True, exist_ok=True)
62
+
63
+
64
+ def apply_variables(
65
+ apply_to: Dict[str, Any], variables: Dict[str, str]
66
+ ) -> Dict[str, Any]:
67
+ """Safely applies the variables to a config.
68
+
69
+ For example: For config:
70
+ {'a' : ${b}}, the value of ${b} is replaced by b in the variables.
71
+
72
+ If the ${b} does not exist in the variables, it is ignored in the config.
73
+
74
+ Args:
75
+ apply_to (dict): The config to apply variables
76
+ variables (dict): The variables in key, value pairs
77
+
78
+ Raises:
79
+ Exception: If the variables is not dict
80
+
81
+ Returns:
82
+ dict: A transformed dict with variables applied
83
+ """
84
+ if not isinstance(variables, dict):
85
+ raise Exception("Argument Variables should be dict")
86
+
87
+ json_d = json.dumps(apply_to)
88
+ string_template = str_template(json_d)
89
+
90
+ template = string_template.safe_substitute(variables)
91
+
92
+ if "$" in template:
93
+ logger.warning(
94
+ "Not all variables found in the config are found in the variables"
95
+ )
96
+
97
+ return json.loads(template)
98
+
99
+
100
+ def get_module_and_attr_names(command: str) -> Tuple[str, str]:
101
+ """Given a string of module.function, this functions returns the module name and func names.
102
+
103
+ It also checks to make sure that the string is of expected 'module.func' format
104
+
105
+ Args:
106
+ command (str): String of format module.function_name
107
+
108
+ Raises:
109
+ Exception: If the string is of not format
110
+
111
+ Returns:
112
+ Tuple[str, str]: (module_name, function_name) extracted from the input string
113
+ """
114
+ mods = command.split(".")
115
+ if len(mods) <= 1:
116
+ raise Exception("The command should be a function to call")
117
+ func = mods[-1]
118
+ module = ".".join(mods[:-1])
119
+ return module, func
120
+
121
+
122
+ def load_yaml(file_path: str, load_type: str = "safe") -> Dict[str, Any]:
123
+ """Loads an yaml and returns the dictionary.
124
+
125
+ Args:
126
+ file_path (str): The path of the yamlfile
127
+ load_type (str, optional): The load type as understood by ruamel. Defaults to 'safe'.
128
+
129
+ Returns:
130
+ dict: The mapping as defined in the yaml file
131
+ """
132
+ with open(file_path, encoding="utf-8") as f:
133
+ yaml = YAML(typ=load_type, pure=True)
134
+ yaml_config = yaml.load(f)
135
+ return yaml_config
136
+
137
+
138
+ def is_a_git_repo() -> bool:
139
+ """Does a git command to see if the project is git versioned.
140
+
141
+ Returns:
142
+ bool: True if it is git versioned, False otherwise
143
+ """
144
+ command = "git rev-parse --is-inside-work-tree"
145
+ try:
146
+ subprocess.check_output(command.split()).strip().decode("utf-8")
147
+ logger.info("Found the code to be git versioned")
148
+ return True
149
+ except BaseException: # pylint: disable=W0702
150
+ console.print("Not a git repo", style="bold red")
151
+
152
+ return False
153
+
154
+
155
+ def get_current_code_commit() -> Union[str, None]:
156
+ """Gets the git sha id if the project is version controlled.
157
+
158
+ Returns:
159
+ Union[str, None]: SHA ID if the code is versioned, None otherwise
160
+ """
161
+ command = "git rev-parse HEAD"
162
+ if not is_a_git_repo():
163
+ return None
164
+ try:
165
+ label = subprocess.check_output(command.split()).strip().decode("utf-8")
166
+ logger.info("Found the git commit to be: %s", label)
167
+ return label
168
+ except BaseException: # pylint: disable=W0702
169
+ console.print("Not a git repo, error getting hash", style="bold red")
170
+ raise
171
+
172
+
173
+ def is_git_clean() -> Tuple[bool, Union[None, str]]:
174
+ """Checks if the git tree is clean and there are no modified tracked files.
175
+
176
+ Returns:
177
+ Tuple[bool, Union[None, str]]: None if its clean, comma-seperated file names if it is changed
178
+ """
179
+ command = "git diff --name-only"
180
+ if not is_a_git_repo():
181
+ return False, None
182
+ try:
183
+ label = subprocess.check_output(command.split()).strip().decode("utf-8")
184
+ if not label:
185
+ return True, None
186
+ return False, label
187
+ except BaseException: # pylint: disable=W0702
188
+ console.print("Not a git repo, not clean", style="bold red")
189
+
190
+ return False, None
191
+
192
+
193
+ def get_git_remote() -> Union[str, None]:
194
+ """Gets the remote URL of git.
195
+
196
+ Returns:
197
+ Union[str, None]: Remote URL if the code is version controlled, None otherwise
198
+ """
199
+ command = "git config --get remote.origin.url"
200
+ if not is_a_git_repo():
201
+ return None
202
+ try:
203
+ label = subprocess.check_output(command.split()).strip().decode("utf-8")
204
+ logger.info("Found the git remote to be: %s", label)
205
+ return label
206
+ except BaseException: # pylint: disable=W0702
207
+ console.print("Not a git repo, no remote", style="bold red")
208
+ raise
209
+
210
+
211
+ def get_local_docker_image_id(image_name: str) -> str:
212
+ """If we are running in local settings, return the docker image id.
213
+
214
+ Args:
215
+ image_name (str): The image name we need the digest for
216
+
217
+ Returns:
218
+ str: The docker image digest
219
+ """
220
+ try:
221
+ import docker
222
+
223
+ client = docker.from_env()
224
+ image = client.images.get(image_name)
225
+ return image.attrs["Id"]
226
+ except ImportError: # pragma: no cover
227
+ logger.warning(
228
+ "Did not find docker installed, some functionality might be affected"
229
+ )
230
+ except BaseException:
231
+ logger.exception(f"Could not find the image by name {image_name}")
232
+
233
+ return ""
234
+
235
+
236
+ def get_git_code_identity():
237
+ """Returns a code identity object for version controlled code.
238
+
239
+ Args:
240
+ run_log_store (runnable.datastore.BaseRunLogStore): The run log store used in this process
241
+
242
+ Returns:
243
+ runnable.datastore.CodeIdentity: The code identity used by the run log store.
244
+ """
245
+ current_context = context.get_run_context()
246
+ if current_context is None:
247
+ raise RuntimeError("No run context available")
248
+ code_identity = current_context.run_log_store.create_code_identity()
249
+ try:
250
+ code_identity.code_identifier = get_current_code_commit()
251
+ code_identity.code_identifier_type = "git"
252
+ code_identity.code_identifier_dependable, changed = is_git_clean()
253
+ code_identity.code_identifier_url = get_git_remote()
254
+ if changed:
255
+ code_identity.code_identifier_message = "changes found in " + ", ".join(
256
+ changed.split("\n")
257
+ )
258
+ except BaseException:
259
+ logger.exception("Git code versioning problems")
260
+
261
+ return code_identity
262
+
263
+
264
+ def remove_prefix(text: str, prefix: str) -> str:
265
+ """Removes a prefix if one is present in the input text.
266
+
267
+ Args:
268
+ text (str): The input text to remove the prefix from
269
+ prefix (str): The prefix that has to be removed
270
+
271
+ Returns:
272
+ str: The original string if no prefix is found, or the right prefix chomped string if present
273
+ """
274
+ if text.startswith(prefix):
275
+ return text[len(prefix) :]
276
+ return text # or whatever is given
277
+
278
+
279
+ def diff_dict(d1: Dict[str, Any], d2: Dict[str, Any]) -> Dict[str, Any]:
280
+ """
281
+ Given two dicts d1 and d2, return a new dict that has upsert items from d1.
282
+
283
+ Args:
284
+ d1 (reference): The reference dict.
285
+ d2 (compare): Any new or modified items compared to d1 would be returned back
286
+
287
+ Returns:
288
+ dict: Any new or modified items in d2 in comparison to d1 would be sent back
289
+ """
290
+ diff = {}
291
+
292
+ for k2, v2 in d2.items():
293
+ if k2 in d1 and d1[k2] != v2:
294
+ diff[k2] = v2
295
+ continue
296
+ diff[k2] = v2
297
+
298
+ return diff
299
+
300
+
301
+ def get_data_hash(file_name: str) -> str:
302
+ """Returns the hash of the data file.
303
+
304
+ For small files (<1GB): Returns full SHA256 hash
305
+ For large files (>=1GB): Returns fingerprint hash of first chunk + last chunk + file size
306
+
307
+ Args:
308
+ file_name (str): The file name to generate the hash for
309
+
310
+ Raises:
311
+ FileNotFoundError: If the file does not exist
312
+ PermissionError: If the file cannot be read due to permissions
313
+ OSError: If there are other I/O errors
314
+
315
+ Returns:
316
+ str: The SHA256 hash or fingerprint of the file contents
317
+ """
318
+ start_time = time.time()
319
+
320
+ try:
321
+ file_path = Path(file_name)
322
+ file_size = file_path.stat().st_size
323
+
324
+ # Use appropriate algorithm based on file size
325
+ if file_size < defaults.LARGE_FILE_THRESHOLD_BYTES:
326
+ result = _compute_full_file_hash(file_name)
327
+ logger.debug(
328
+ f"Full hash computed for {file_name} ({file_size} bytes) in {time.time() - start_time:.3f}s"
329
+ )
330
+ else:
331
+ result = _compute_large_file_fingerprint(file_name, file_size)
332
+ logger.info(
333
+ f"Fingerprint hash computed for {file_name} ({file_size} bytes) in {time.time() - start_time:.3f}s"
334
+ )
335
+
336
+ return result
337
+ except FileNotFoundError:
338
+ logger.error(f"File not found: {file_name}")
339
+ raise
340
+ except PermissionError:
341
+ logger.error(f"Permission denied accessing file: {file_name}")
342
+ raise
343
+ except OSError as e:
344
+ logger.error(f"I/O error accessing file {file_name}: {e}")
345
+ raise
346
+
347
+
348
+ def _compute_full_file_hash(file_name: str) -> str:
349
+ """Compute SHA256 hash of entire file using streaming approach."""
350
+ with open(file_name, "rb") as f:
351
+ file_hash = hashlib.sha256()
352
+ for chunk in iter(lambda: f.read(4096), b""):
353
+ file_hash.update(chunk)
354
+ return file_hash.hexdigest()
355
+
356
+
357
+ def _compute_large_file_fingerprint(file_name: str, file_size: int) -> str:
358
+ """Compute fingerprint hash for large files using first/last chunks + metadata."""
359
+ with open(file_name, "rb") as f:
360
+ file_hash = hashlib.sha256()
361
+
362
+ # Include file size in hash for uniqueness
363
+ file_hash.update(str(file_size).encode())
364
+
365
+ # Read first chunk
366
+ first_chunk = f.read(defaults.HASH_CHUNK_SIZE)
367
+ file_hash.update(first_chunk)
368
+
369
+ # Read last chunk if file is large enough and different from first chunk
370
+ if file_size > defaults.HASH_CHUNK_SIZE:
371
+ f.seek(-min(defaults.HASH_CHUNK_SIZE, file_size - len(first_chunk)), 2)
372
+ last_chunk = f.read(defaults.HASH_CHUNK_SIZE)
373
+ file_hash.update(last_chunk)
374
+
375
+ return file_hash.hexdigest()
376
+
377
+
378
+ def json_to_ordered_dict(json_str: str) -> OrderedDict:
379
+ """Decode a JSON str into OrderedDict.
380
+
381
+ Args:
382
+ json_str ([str]): The JSON string to decode
383
+
384
+ Returns:
385
+ [OrderedDict]: The decoded OrderedDict
386
+ """
387
+ if json_str and json_str != "{}":
388
+ return json.loads(json_str, object_pairs_hook=OrderedDict)
389
+
390
+ return OrderedDict()
391
+
392
+
393
+ def gather_variables() -> Dict[str, str]:
394
+ """Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
395
+
396
+ Returns:
397
+ dict: All the environment variables present in the environment.
398
+ """
399
+ variables = {}
400
+
401
+ for env_var, value in os.environ.items():
402
+ if env_var.startswith(defaults.VARIABLE_PREFIX):
403
+ key = remove_prefix(env_var, defaults.VARIABLE_PREFIX)
404
+ variables[key] = value
405
+
406
+ return variables
407
+
408
+
409
+ def make_log_file_name(
410
+ name: str,
411
+ iter_variable: Optional[IterableParameterModel] = None,
412
+ ) -> str:
413
+ random_tag = "".join(random.choices(string.ascii_uppercase + string.digits, k=3))
414
+ log_file_name = name
415
+
416
+ if iter_variable and iter_variable.map_variable:
417
+ for _, value in iter_variable.map_variable.items():
418
+ log_file_name += "_" + str(value)
419
+
420
+ log_file_name += "_" + random_tag
421
+ log_file_name = "".join(x for x in log_file_name if x.isalnum()) + ".execution.log"
422
+
423
+ return log_file_name