runnable 0.9.1__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runnable/__init__.py CHANGED
@@ -4,26 +4,30 @@
4
4
  import logging
5
5
  from logging.config import dictConfig
6
6
 
7
+ from rich.console import Console
8
+
7
9
  from runnable import defaults
8
10
 
9
11
  dictConfig(defaults.LOGGING_CONFIG)
10
12
  logger = logging.getLogger(defaults.LOGGER_NAME)
11
13
 
14
+ console = Console()
15
+ console.print(":runner: Lets go!!")
12
16
 
13
- from runnable.sdk import (
14
- Stub,
15
- Pipeline,
16
- Parallel,
17
- Map,
17
+ from runnable.sdk import ( # noqa
18
18
  Catalog,
19
- Success,
20
19
  Fail,
21
- PythonTask,
20
+ Map,
22
21
  NotebookTask,
22
+ Parallel,
23
+ Pipeline,
24
+ PythonTask,
23
25
  ShellTask,
26
+ Stub,
27
+ Success,
28
+ metric,
24
29
  pickled,
25
- ) # noqa
26
-
30
+ )
27
31
 
28
32
  # TODO: Think of model registry as a central place to store models.
29
33
  # TODO: Implement Sagemaker pipelines as a executor.
runnable/catalog.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from abc import ABC, abstractmethod
3
- from typing import List, Optional
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
5
  from pydantic import BaseModel, ConfigDict
6
6
 
@@ -25,6 +25,10 @@ class BaseCatalog(ABC, BaseModel):
25
25
  service_type: str = "catalog"
26
26
  model_config = ConfigDict(extra="forbid")
27
27
 
28
+ @abstractmethod
29
+ def get_summary(self) -> Dict[str, Any]:
30
+ ...
31
+
28
32
  @property
29
33
  def _context(self):
30
34
  return context.run_context
@@ -112,6 +116,9 @@ class DoNothingCatalog(BaseCatalog):
112
116
 
113
117
  service_name: str = "do-nothing"
114
118
 
119
+ def get_summary(self) -> Dict[str, Any]:
120
+ return {}
121
+
115
122
  def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
116
123
  """
117
124
  Does nothing
runnable/cli.py CHANGED
@@ -60,6 +60,7 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id): # prag
60
60
  provided
61
61
  """
62
62
  logger.setLevel(log_level)
63
+
63
64
  entrypoints.execute(
64
65
  configuration_file=config_file,
65
66
  pipeline_file=file,
runnable/context.py CHANGED
@@ -1,11 +1,11 @@
1
1
  from typing import Dict, Optional
2
2
 
3
- from pydantic import BaseModel, SerializeAsAny
3
+ from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
4
+ from rich.progress import Progress
4
5
 
5
6
  from runnable.catalog import BaseCatalog
6
7
  from runnable.datastore import BaseRunLogStore
7
8
  from runnable.executor import BaseExecutor
8
- from runnable.experiment_tracker import BaseExperimentTracker
9
9
  from runnable.graph import Graph
10
10
  from runnable.pickler import BasePickler
11
11
  from runnable.secrets import BaseSecrets
@@ -16,8 +16,10 @@ class Context(BaseModel):
16
16
  run_log_store: SerializeAsAny[BaseRunLogStore]
17
17
  secrets_handler: SerializeAsAny[BaseSecrets]
18
18
  catalog_handler: SerializeAsAny[BaseCatalog]
19
- experiment_tracker: SerializeAsAny[BaseExperimentTracker]
20
19
  pickler: SerializeAsAny[BasePickler]
20
+ progress: SerializeAsAny[Optional[Progress]] = Field(default=None, exclude=True)
21
+
22
+ model_config = ConfigDict(arbitrary_types_allowed=True)
21
23
 
22
24
  pipeline_file: Optional[str] = ""
23
25
  parameters_file: Optional[str] = ""
runnable/datastore.py CHANGED
@@ -4,23 +4,29 @@ import logging
4
4
  import os
5
5
  from abc import ABC, abstractmethod
6
6
  from datetime import datetime
7
- from typing import Annotated, Any, Dict, List, Literal, Optional, OrderedDict, Tuple, Union
7
+ from typing import (
8
+ Annotated,
9
+ Any,
10
+ Dict,
11
+ List,
12
+ Literal,
13
+ Optional,
14
+ OrderedDict,
15
+ Tuple,
16
+ Union,
17
+ )
8
18
 
9
19
  from pydantic import BaseModel, Field, computed_field
10
- from typing_extensions import TypeAliasType
11
20
 
12
21
  import runnable.context as context
13
22
  from runnable import defaults, exceptions
14
23
 
15
24
  logger = logging.getLogger(defaults.LOGGER_NAME)
16
25
 
17
- # Once defined these classes are sealed to any additions unless a default is provided
18
- # Breaking this rule might make runnable backwardly incompatible
19
26
 
20
- JSONType = TypeAliasType(
21
- "JSONType",
22
- Union[bool, int, float, str, None, List["JSONType"], Dict[str, "JSONType"]], # type: ignore
23
- )
27
+ JSONType = Union[
28
+ str, int, float, bool, List[Any], Dict[str, Any]
29
+ ] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
24
30
 
25
31
 
26
32
  class DataCatalog(BaseModel, extra="allow"):
@@ -62,10 +68,29 @@ The theory behind reduced:
62
68
 
63
69
  class JsonParameter(BaseModel):
64
70
  kind: Literal["json"]
65
- value: JSONType # type: ignore
71
+ value: JSONType
72
+ reduced: bool = True
73
+
74
+ @computed_field # type: ignore
75
+ @property
76
+ def description(self) -> JSONType:
77
+ return self.value
78
+
79
+ def get_value(self) -> JSONType:
80
+ return self.value
81
+
82
+
83
+ class MetricParameter(BaseModel):
84
+ kind: Literal["metric"]
85
+ value: JSONType
66
86
  reduced: bool = True
67
87
 
68
- def get_value(self) -> JSONType: # type: ignore
88
+ @computed_field # type: ignore
89
+ @property
90
+ def description(self) -> JSONType:
91
+ return self.value
92
+
93
+ def get_value(self) -> JSONType:
69
94
  return self.value
70
95
 
71
96
 
@@ -100,7 +125,7 @@ class ObjectParameter(BaseModel):
100
125
  os.remove(self.file_name) # Remove after loading
101
126
 
102
127
 
103
- Parameter = Annotated[Union[JsonParameter, ObjectParameter], Field(discriminator="kind")]
128
+ Parameter = Annotated[Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")]
104
129
 
105
130
 
106
131
  class StepAttempt(BaseModel):
@@ -115,6 +140,7 @@ class StepAttempt(BaseModel):
115
140
  message: str = ""
116
141
  input_parameters: Dict[str, Parameter] = Field(default_factory=dict)
117
142
  output_parameters: Dict[str, Parameter] = Field(default_factory=dict)
143
+ user_defined_metrics: Dict[str, Parameter] = Field(default_factory=dict)
118
144
 
119
145
  @property
120
146
  def duration(self):
@@ -149,10 +175,43 @@ class StepLog(BaseModel):
149
175
  mock: bool = False
150
176
  code_identities: List[CodeIdentity] = Field(default_factory=list)
151
177
  attempts: List[StepAttempt] = Field(default_factory=list)
152
- user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
153
178
  branches: Dict[str, BranchLog] = Field(default_factory=dict)
154
179
  data_catalog: List[DataCatalog] = Field(default_factory=list)
155
180
 
181
+ def get_summary(self) -> Dict[str, Any]:
182
+ """
183
+ Summarize the step log to log
184
+ """
185
+ summary: Dict[str, Any] = {}
186
+
187
+ summary["Name"] = self.internal_name
188
+ summary["Input catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "get"]
189
+ summary["Available parameters"] = [
190
+ (p, v.description) for attempt in self.attempts for p, v in attempt.input_parameters.items()
191
+ ]
192
+
193
+ summary["Output catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "put"]
194
+ summary["Output parameters"] = [
195
+ (p, v.description) for attempt in self.attempts for p, v in attempt.output_parameters.items()
196
+ ]
197
+
198
+ summary["Metrics"] = [
199
+ (p, v.description) for attempt in self.attempts for p, v in attempt.user_defined_metrics.items()
200
+ ]
201
+
202
+ cis = []
203
+ for ci in self.code_identities:
204
+ message = f"{ci.code_identifier_type}:{ci.code_identifier}"
205
+ if not ci.code_identifier_dependable:
206
+ message += " but is not dependable"
207
+ cis.append(message)
208
+
209
+ summary["Code identities"] = cis
210
+
211
+ summary["status"] = self.status
212
+
213
+ return summary
214
+
156
215
  def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
157
216
  """
158
217
  Given a stage, return the data catalogs according to the stage
@@ -242,6 +301,22 @@ class RunLog(BaseModel):
242
301
  parameters: Dict[str, Parameter] = Field(default_factory=dict)
243
302
  run_config: Dict[str, Any] = Field(default_factory=dict)
244
303
 
304
+ def get_summary(self) -> Dict[str, Any]:
305
+ summary: Dict[str, Any] = {}
306
+
307
+ _context = context.run_context
308
+
309
+ summary["Unique execution id"] = self.run_id
310
+ summary["status"] = self.status
311
+
312
+ summary["Catalog Location"] = _context.catalog_handler.get_summary()
313
+ summary["Full Run log present at: "] = _context.run_log_store.get_summary()
314
+
315
+ summary["Final Parameters"] = {p: v.description for p, v in self.parameters.items()}
316
+ summary["Collected metrics"] = {p: v.description for p, v in self.parameters.items() if v.kind == "metric"}
317
+
318
+ return summary
319
+
245
320
  def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
246
321
  """
247
322
  Return all the cataloged data by the stage at which they were cataloged.
@@ -360,6 +435,10 @@ class BaseRunLogStore(ABC, BaseModel):
360
435
  service_name: str = ""
361
436
  service_type: str = "run_log_store"
362
437
 
438
+ @abstractmethod
439
+ def get_summary(self) -> Dict[str, Any]:
440
+ ...
441
+
363
442
  @property
364
443
  def _context(self):
365
444
  return context.run_context
@@ -693,6 +772,11 @@ class BufferRunLogstore(BaseRunLogStore):
693
772
  service_name: str = "buffered"
694
773
  run_log: Optional[RunLog] = Field(default=None, exclude=True) # For a buffered Run Log, this is the database
695
774
 
775
+ def get_summary(self) -> Dict[str, Any]:
776
+ summary = {"Type": self.service_name, "Location": "Not persisted"}
777
+
778
+ return summary
779
+
696
780
  def create_run_log(
697
781
  self,
698
782
  run_id: str,
runnable/defaults.py CHANGED
@@ -1,17 +1,10 @@
1
- # mypy: ignore-errors
2
- # The above should be done until https://github.com/python/mypy/issues/8823
3
1
  from enum import Enum
2
+ from typing import TypedDict # type: ignore[unused-ignore]
4
3
  from typing import Any, Dict, Mapping, Optional, Union
5
4
 
5
+ from rich.style import Style
6
6
  from typing_extensions import TypeAlias
7
7
 
8
- # TODO: This is not the correct way to do this.
9
- try: # pragma: no cover
10
- from typing import TypedDict # type: ignore[unused-ignore]
11
- except ImportError: # pragma: no cover
12
- from typing_extensions import TypedDict # type: ignore[unused-ignore]
13
-
14
-
15
8
  NAME = "runnable"
16
9
  LOGGER_NAME = "runnable"
17
10
 
@@ -182,3 +175,10 @@ LOGGING_CONFIG = {
182
175
  LOGGER_NAME: {"handlers": ["runnable_handler"], "propagate": False},
183
176
  },
184
177
  }
178
+
179
+
180
+ # styles
181
+ error_style = Style(color="red", bold=True)
182
+ warning_style = Style(color="yellow", bold=True)
183
+ success_style = Style(color="green", bold=True)
184
+ info_style = Style(color="blue", bold=True)
runnable/entrypoints.py CHANGED
@@ -5,10 +5,11 @@ import os
5
5
  import sys
6
6
  from typing import Optional, cast
7
7
 
8
- from rich import print
8
+ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
9
+ from rich.table import Column
9
10
 
10
11
  import runnable.context as context
11
- from runnable import defaults, graph, utils
12
+ from runnable import console, defaults, graph, utils
12
13
  from runnable.defaults import RunnableConfig, ServiceConfig
13
14
 
14
15
  logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -64,6 +65,8 @@ def prepare_configurations(
64
65
 
65
66
  configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
66
67
 
68
+ logger.info(f"Resolved configurations: {configuration}")
69
+
67
70
  # Run log settings, configuration over-rides everything
68
71
  run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
69
72
  if not run_log_config:
@@ -86,14 +89,6 @@ def prepare_configurations(
86
89
  pickler_config = cast(ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER))
87
90
  pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
88
91
 
89
- # experiment tracker settings, configuration over-rides everything
90
- tracker_config: Optional[ServiceConfig] = configuration.get("experiment_tracker", None)
91
- if not tracker_config:
92
- tracker_config = cast(
93
- ServiceConfig, runnable_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER)
94
- )
95
- tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config)
96
-
97
92
  # executor configurations, configuration over rides everything
98
93
  executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
99
94
  if force_local_executor:
@@ -110,7 +105,6 @@ def prepare_configurations(
110
105
  catalog_handler=catalog_handler,
111
106
  secrets_handler=secrets_handler,
112
107
  pickler=pickler_handler,
113
- experiment_tracker=tracker_handler,
114
108
  variables=variables,
115
109
  tag=tag,
116
110
  run_id=run_id,
@@ -176,8 +170,8 @@ def execute(
176
170
  tag=tag,
177
171
  parameters_file=parameters_file,
178
172
  )
179
- print("Working with context:")
180
- print(run_context)
173
+ console.print("Working with context:")
174
+ console.print(run_context)
181
175
 
182
176
  executor = run_context.executor
183
177
 
@@ -188,8 +182,28 @@ def execute(
188
182
  # Prepare for graph execution
189
183
  executor.prepare_for_graph_execution()
190
184
 
191
- logger.info("Executing the graph")
192
- executor.execute_graph(dag=run_context.dag) # type: ignore
185
+ logger.info(f"Executing the graph: {run_context.dag}")
186
+ with Progress(
187
+ TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
188
+ BarColumn(table_column=Column(ratio=1), style="dark_orange"),
189
+ TimeElapsedColumn(table_column=Column(ratio=1)),
190
+ console=console,
191
+ expand=True,
192
+ ) as progress:
193
+ pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1)
194
+ try:
195
+ run_context.progress = progress
196
+ executor.execute_graph(dag=run_context.dag) # type: ignore
197
+
198
+ run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
199
+
200
+ if run_log.status == defaults.SUCCESS:
201
+ progress.update(pipeline_execution_task, description="[green] Success", completed=True)
202
+ else:
203
+ progress.update(pipeline_execution_task, description="[red] Failed", completed=True)
204
+ except Exception as e: # noqa: E722
205
+ console.print(e, style=defaults.error_style)
206
+ progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True)
193
207
 
194
208
  executor.send_return_code()
195
209
 
@@ -227,8 +241,8 @@ def execute_single_node(
227
241
  tag=tag,
228
242
  parameters_file=parameters_file,
229
243
  )
230
- print("Working with context:")
231
- print(run_context)
244
+ console.print("Working with context:")
245
+ console.print(run_context)
232
246
 
233
247
  executor = run_context.executor
234
248
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -280,8 +294,8 @@ def execute_notebook(
280
294
  run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
281
295
  utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
282
296
 
283
- print("Working with context:")
284
- print(run_context)
297
+ console.print("Working with context:")
298
+ console.print(run_context)
285
299
 
286
300
  step_config = {
287
301
  "command": notebook_file,
@@ -342,8 +356,8 @@ def execute_function(
342
356
  run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
343
357
  utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
344
358
 
345
- print("Working with context:")
346
- print(run_context)
359
+ console.print("Working with context:")
360
+ console.print(run_context)
347
361
 
348
362
  # Prepare the graph with a single node
349
363
  step_config = {
@@ -411,8 +425,8 @@ def fan(
411
425
  tag=tag,
412
426
  parameters_file=parameters_file,
413
427
  )
414
- print("Working with context:")
415
- print(run_context)
428
+ console.print("Working with context:")
429
+ console.print(run_context)
416
430
 
417
431
  executor = run_context.executor
418
432
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -437,4 +451,4 @@ def fan(
437
451
 
438
452
  if __name__ == "__main__":
439
453
  # This is only for perf testing purposes.
440
- prepare_configurations(run_id="abc", pipeline_file="example/mocking.yaml")
454
+ prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
runnable/exceptions.py CHANGED
@@ -92,3 +92,7 @@ class ExecutionFailedError(Exception): # pragma: no cover
92
92
  def __init__(self, run_id: str):
93
93
  super().__init__()
94
94
  self.message = f"Execution failed for run id: {run_id}"
95
+
96
+
97
+ class CommandCallError(Exception): # pragma: no cover
98
+ "An exception during the call of the command"
@@ -2,7 +2,7 @@ import logging
2
2
  import os
3
3
  import shutil
4
4
  from pathlib import Path
5
- from typing import List, Optional
5
+ from typing import Any, Dict, List, Optional
6
6
 
7
7
  from runnable import defaults, utils
8
8
  from runnable.catalog import BaseCatalog
@@ -34,6 +34,13 @@ class FileSystemCatalog(BaseCatalog):
34
34
  def get_catalog_location(self):
35
35
  return self.catalog_location
36
36
 
37
+ def get_summary(self) -> Dict[str, Any]:
38
+ summary = {
39
+ "Catalog Location": self.get_catalog_location(),
40
+ }
41
+
42
+ return summary
43
+
37
44
  def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
38
45
  """
39
46
  Get the file by matching glob pattern to the name