runnable 0.9.1__py3-none-any.whl → 0.11.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
runnable/__init__.py CHANGED
@@ -4,26 +4,30 @@
4
4
  import logging
5
5
  from logging.config import dictConfig
6
6
 
7
+ from rich.console import Console
8
+
7
9
  from runnable import defaults
8
10
 
9
11
  dictConfig(defaults.LOGGING_CONFIG)
10
12
  logger = logging.getLogger(defaults.LOGGER_NAME)
11
13
 
14
+ console = Console()
15
+ console.print(":runner: Lets go!!")
12
16
 
13
- from runnable.sdk import (
14
- Stub,
15
- Pipeline,
16
- Parallel,
17
- Map,
17
+ from runnable.sdk import ( # noqa
18
18
  Catalog,
19
- Success,
20
19
  Fail,
21
- PythonTask,
20
+ Map,
22
21
  NotebookTask,
22
+ Parallel,
23
+ Pipeline,
24
+ PythonTask,
23
25
  ShellTask,
26
+ Stub,
27
+ Success,
28
+ metric,
24
29
  pickled,
25
- ) # noqa
26
-
30
+ )
27
31
 
28
32
  # TODO: Think of model registry as a central place to store models.
29
33
  # TODO: Implement Sagemaker pipelines as a executor.
runnable/catalog.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
  from abc import ABC, abstractmethod
3
- from typing import List, Optional
3
+ from typing import Any, Dict, List, Optional
4
4
 
5
5
  from pydantic import BaseModel, ConfigDict
6
6
 
@@ -25,6 +25,10 @@ class BaseCatalog(ABC, BaseModel):
25
25
  service_type: str = "catalog"
26
26
  model_config = ConfigDict(extra="forbid")
27
27
 
28
+ @abstractmethod
29
+ def get_summary(self) -> Dict[str, Any]:
30
+ ...
31
+
28
32
  @property
29
33
  def _context(self):
30
34
  return context.run_context
@@ -112,6 +116,9 @@ class DoNothingCatalog(BaseCatalog):
112
116
 
113
117
  service_name: str = "do-nothing"
114
118
 
119
+ def get_summary(self) -> Dict[str, Any]:
120
+ return {}
121
+
115
122
  def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
116
123
  """
117
124
  Does nothing
runnable/cli.py CHANGED
@@ -60,6 +60,7 @@ def execute(file, config_file, parameters_file, log_level, tag, run_id): # prag
60
60
  provided
61
61
  """
62
62
  logger.setLevel(log_level)
63
+
63
64
  entrypoints.execute(
64
65
  configuration_file=config_file,
65
66
  pipeline_file=file,
runnable/context.py CHANGED
@@ -1,11 +1,11 @@
1
1
  from typing import Dict, Optional
2
2
 
3
- from pydantic import BaseModel, SerializeAsAny
3
+ from pydantic import BaseModel, ConfigDict, Field, SerializeAsAny
4
+ from rich.progress import Progress
4
5
 
5
6
  from runnable.catalog import BaseCatalog
6
7
  from runnable.datastore import BaseRunLogStore
7
8
  from runnable.executor import BaseExecutor
8
- from runnable.experiment_tracker import BaseExperimentTracker
9
9
  from runnable.graph import Graph
10
10
  from runnable.pickler import BasePickler
11
11
  from runnable.secrets import BaseSecrets
@@ -16,8 +16,10 @@ class Context(BaseModel):
16
16
  run_log_store: SerializeAsAny[BaseRunLogStore]
17
17
  secrets_handler: SerializeAsAny[BaseSecrets]
18
18
  catalog_handler: SerializeAsAny[BaseCatalog]
19
- experiment_tracker: SerializeAsAny[BaseExperimentTracker]
20
19
  pickler: SerializeAsAny[BasePickler]
20
+ progress: SerializeAsAny[Optional[Progress]] = Field(default=None, exclude=True)
21
+
22
+ model_config = ConfigDict(arbitrary_types_allowed=True)
21
23
 
22
24
  pipeline_file: Optional[str] = ""
23
25
  parameters_file: Optional[str] = ""
runnable/datastore.py CHANGED
@@ -4,23 +4,29 @@ import logging
4
4
  import os
5
5
  from abc import ABC, abstractmethod
6
6
  from datetime import datetime
7
- from typing import Annotated, Any, Dict, List, Literal, Optional, OrderedDict, Tuple, Union
7
+ from typing import (
8
+ Annotated,
9
+ Any,
10
+ Dict,
11
+ List,
12
+ Literal,
13
+ Optional,
14
+ OrderedDict,
15
+ Tuple,
16
+ Union,
17
+ )
8
18
 
9
19
  from pydantic import BaseModel, Field, computed_field
10
- from typing_extensions import TypeAliasType
11
20
 
12
21
  import runnable.context as context
13
22
  from runnable import defaults, exceptions
14
23
 
15
24
  logger = logging.getLogger(defaults.LOGGER_NAME)
16
25
 
17
- # Once defined these classes are sealed to any additions unless a default is provided
18
- # Breaking this rule might make runnable backwardly incompatible
19
26
 
20
- JSONType = TypeAliasType(
21
- "JSONType",
22
- Union[bool, int, float, str, None, List["JSONType"], Dict[str, "JSONType"]], # type: ignore
23
- )
27
+ JSONType = Union[
28
+ str, int, float, bool, List[Any], Dict[str, Any]
29
+ ] # This is actually JSONType, but pydantic doesn't support TypeAlias yet
24
30
 
25
31
 
26
32
  class DataCatalog(BaseModel, extra="allow"):
@@ -62,10 +68,29 @@ The theory behind reduced:
62
68
 
63
69
  class JsonParameter(BaseModel):
64
70
  kind: Literal["json"]
65
- value: JSONType # type: ignore
71
+ value: JSONType
72
+ reduced: bool = True
73
+
74
+ @computed_field # type: ignore
75
+ @property
76
+ def description(self) -> JSONType:
77
+ return self.value
78
+
79
+ def get_value(self) -> JSONType:
80
+ return self.value
81
+
82
+
83
+ class MetricParameter(BaseModel):
84
+ kind: Literal["metric"]
85
+ value: JSONType
66
86
  reduced: bool = True
67
87
 
68
- def get_value(self) -> JSONType: # type: ignore
88
+ @computed_field # type: ignore
89
+ @property
90
+ def description(self) -> JSONType:
91
+ return self.value
92
+
93
+ def get_value(self) -> JSONType:
69
94
  return self.value
70
95
 
71
96
 
@@ -100,7 +125,7 @@ class ObjectParameter(BaseModel):
100
125
  os.remove(self.file_name) # Remove after loading
101
126
 
102
127
 
103
- Parameter = Annotated[Union[JsonParameter, ObjectParameter], Field(discriminator="kind")]
128
+ Parameter = Annotated[Union[JsonParameter, ObjectParameter, MetricParameter], Field(discriminator="kind")]
104
129
 
105
130
 
106
131
  class StepAttempt(BaseModel):
@@ -115,6 +140,7 @@ class StepAttempt(BaseModel):
115
140
  message: str = ""
116
141
  input_parameters: Dict[str, Parameter] = Field(default_factory=dict)
117
142
  output_parameters: Dict[str, Parameter] = Field(default_factory=dict)
143
+ user_defined_metrics: Dict[str, Parameter] = Field(default_factory=dict)
118
144
 
119
145
  @property
120
146
  def duration(self):
@@ -149,10 +175,43 @@ class StepLog(BaseModel):
149
175
  mock: bool = False
150
176
  code_identities: List[CodeIdentity] = Field(default_factory=list)
151
177
  attempts: List[StepAttempt] = Field(default_factory=list)
152
- user_defined_metrics: Dict[str, Any] = Field(default_factory=dict)
153
178
  branches: Dict[str, BranchLog] = Field(default_factory=dict)
154
179
  data_catalog: List[DataCatalog] = Field(default_factory=list)
155
180
 
181
+ def get_summary(self) -> Dict[str, Any]:
182
+ """
183
+ Summarize the step log to log
184
+ """
185
+ summary: Dict[str, Any] = {}
186
+
187
+ summary["Name"] = self.internal_name
188
+ summary["Input catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "get"]
189
+ summary["Available parameters"] = [
190
+ (p, v.description) for attempt in self.attempts for p, v in attempt.input_parameters.items()
191
+ ]
192
+
193
+ summary["Output catalog content"] = [dc.name for dc in self.data_catalog if dc.stage == "put"]
194
+ summary["Output parameters"] = [
195
+ (p, v.description) for attempt in self.attempts for p, v in attempt.output_parameters.items()
196
+ ]
197
+
198
+ summary["Metrics"] = [
199
+ (p, v.description) for attempt in self.attempts for p, v in attempt.user_defined_metrics.items()
200
+ ]
201
+
202
+ cis = []
203
+ for ci in self.code_identities:
204
+ message = f"{ci.code_identifier_type}:{ci.code_identifier}"
205
+ if not ci.code_identifier_dependable:
206
+ message += " but is not dependable"
207
+ cis.append(message)
208
+
209
+ summary["Code identities"] = cis
210
+
211
+ summary["status"] = self.status
212
+
213
+ return summary
214
+
156
215
  def get_data_catalogs_by_stage(self, stage="put") -> List[DataCatalog]:
157
216
  """
158
217
  Given a stage, return the data catalogs according to the stage
@@ -242,6 +301,22 @@ class RunLog(BaseModel):
242
301
  parameters: Dict[str, Parameter] = Field(default_factory=dict)
243
302
  run_config: Dict[str, Any] = Field(default_factory=dict)
244
303
 
304
+ def get_summary(self) -> Dict[str, Any]:
305
+ summary: Dict[str, Any] = {}
306
+
307
+ _context = context.run_context
308
+
309
+ summary["Unique execution id"] = self.run_id
310
+ summary["status"] = self.status
311
+
312
+ summary["Catalog Location"] = _context.catalog_handler.get_summary()
313
+ summary["Full Run log present at: "] = _context.run_log_store.get_summary()
314
+
315
+ summary["Final Parameters"] = {p: v.description for p, v in self.parameters.items()}
316
+ summary["Collected metrics"] = {p: v.description for p, v in self.parameters.items() if v.kind == "metric"}
317
+
318
+ return summary
319
+
245
320
  def get_data_catalogs_by_stage(self, stage: str = "put") -> List[DataCatalog]:
246
321
  """
247
322
  Return all the cataloged data by the stage at which they were cataloged.
@@ -360,6 +435,10 @@ class BaseRunLogStore(ABC, BaseModel):
360
435
  service_name: str = ""
361
436
  service_type: str = "run_log_store"
362
437
 
438
+ @abstractmethod
439
+ def get_summary(self) -> Dict[str, Any]:
440
+ ...
441
+
363
442
  @property
364
443
  def _context(self):
365
444
  return context.run_context
@@ -693,6 +772,11 @@ class BufferRunLogstore(BaseRunLogStore):
693
772
  service_name: str = "buffered"
694
773
  run_log: Optional[RunLog] = Field(default=None, exclude=True) # For a buffered Run Log, this is the database
695
774
 
775
+ def get_summary(self) -> Dict[str, Any]:
776
+ summary = {"Type": self.service_name, "Location": "Not persisted"}
777
+
778
+ return summary
779
+
696
780
  def create_run_log(
697
781
  self,
698
782
  run_id: str,
runnable/defaults.py CHANGED
@@ -1,17 +1,10 @@
1
- # mypy: ignore-errors
2
- # The above should be done until https://github.com/python/mypy/issues/8823
3
1
  from enum import Enum
2
+ from typing import TypedDict # type: ignore[unused-ignore]
4
3
  from typing import Any, Dict, Mapping, Optional, Union
5
4
 
5
+ from rich.style import Style
6
6
  from typing_extensions import TypeAlias
7
7
 
8
- # TODO: This is not the correct way to do this.
9
- try: # pragma: no cover
10
- from typing import TypedDict # type: ignore[unused-ignore]
11
- except ImportError: # pragma: no cover
12
- from typing_extensions import TypedDict # type: ignore[unused-ignore]
13
-
14
-
15
8
  NAME = "runnable"
16
9
  LOGGER_NAME = "runnable"
17
10
 
@@ -182,3 +175,10 @@ LOGGING_CONFIG = {
182
175
  LOGGER_NAME: {"handlers": ["runnable_handler"], "propagate": False},
183
176
  },
184
177
  }
178
+
179
+
180
+ # styles
181
+ error_style = Style(color="red", bold=True)
182
+ warning_style = Style(color="yellow", bold=True)
183
+ success_style = Style(color="green", bold=True)
184
+ info_style = Style(color="blue", bold=True)
runnable/entrypoints.py CHANGED
@@ -5,10 +5,11 @@ import os
5
5
  import sys
6
6
  from typing import Optional, cast
7
7
 
8
- from rich import print
8
+ from rich.progress import BarColumn, Progress, TextColumn, TimeElapsedColumn
9
+ from rich.table import Column
9
10
 
10
11
  import runnable.context as context
11
- from runnable import defaults, graph, utils
12
+ from runnable import console, defaults, graph, utils
12
13
  from runnable.defaults import RunnableConfig, ServiceConfig
13
14
 
14
15
  logger = logging.getLogger(defaults.LOGGER_NAME)
@@ -64,6 +65,8 @@ def prepare_configurations(
64
65
 
65
66
  configuration: RunnableConfig = cast(RunnableConfig, templated_configuration)
66
67
 
68
+ logger.info(f"Resolved configurations: {configuration}")
69
+
67
70
  # Run log settings, configuration over-rides everything
68
71
  run_log_config: Optional[ServiceConfig] = configuration.get("run_log_store", None)
69
72
  if not run_log_config:
@@ -86,14 +89,6 @@ def prepare_configurations(
86
89
  pickler_config = cast(ServiceConfig, runnable_defaults.get("pickler", defaults.DEFAULT_PICKLER))
87
90
  pickler_handler = utils.get_provider_by_name_and_type("pickler", pickler_config)
88
91
 
89
- # experiment tracker settings, configuration over-rides everything
90
- tracker_config: Optional[ServiceConfig] = configuration.get("experiment_tracker", None)
91
- if not tracker_config:
92
- tracker_config = cast(
93
- ServiceConfig, runnable_defaults.get("experiment_tracker", defaults.DEFAULT_EXPERIMENT_TRACKER)
94
- )
95
- tracker_handler = utils.get_provider_by_name_and_type("experiment_tracker", tracker_config)
96
-
97
92
  # executor configurations, configuration over rides everything
98
93
  executor_config: Optional[ServiceConfig] = configuration.get("executor", None)
99
94
  if force_local_executor:
@@ -110,7 +105,6 @@ def prepare_configurations(
110
105
  catalog_handler=catalog_handler,
111
106
  secrets_handler=secrets_handler,
112
107
  pickler=pickler_handler,
113
- experiment_tracker=tracker_handler,
114
108
  variables=variables,
115
109
  tag=tag,
116
110
  run_id=run_id,
@@ -176,8 +170,8 @@ def execute(
176
170
  tag=tag,
177
171
  parameters_file=parameters_file,
178
172
  )
179
- print("Working with context:")
180
- print(run_context)
173
+ console.print("Working with context:")
174
+ console.print(run_context)
181
175
 
182
176
  executor = run_context.executor
183
177
 
@@ -188,8 +182,28 @@ def execute(
188
182
  # Prepare for graph execution
189
183
  executor.prepare_for_graph_execution()
190
184
 
191
- logger.info("Executing the graph")
192
- executor.execute_graph(dag=run_context.dag) # type: ignore
185
+ logger.info(f"Executing the graph: {run_context.dag}")
186
+ with Progress(
187
+ TextColumn("[progress.description]{task.description}", table_column=Column(ratio=2)),
188
+ BarColumn(table_column=Column(ratio=1), style="dark_orange"),
189
+ TimeElapsedColumn(table_column=Column(ratio=1)),
190
+ console=console,
191
+ expand=True,
192
+ ) as progress:
193
+ pipeline_execution_task = progress.add_task("[dark_orange] Starting execution .. ", total=1)
194
+ try:
195
+ run_context.progress = progress
196
+ executor.execute_graph(dag=run_context.dag) # type: ignore
197
+
198
+ run_log = run_context.run_log_store.get_run_log_by_id(run_id=run_context.run_id, full=False)
199
+
200
+ if run_log.status == defaults.SUCCESS:
201
+ progress.update(pipeline_execution_task, description="[green] Success", completed=True)
202
+ else:
203
+ progress.update(pipeline_execution_task, description="[red] Failed", completed=True)
204
+ except Exception as e: # noqa: E722
205
+ console.print(e, style=defaults.error_style)
206
+ progress.update(pipeline_execution_task, description="[red] Errored execution", completed=True)
193
207
 
194
208
  executor.send_return_code()
195
209
 
@@ -227,8 +241,8 @@ def execute_single_node(
227
241
  tag=tag,
228
242
  parameters_file=parameters_file,
229
243
  )
230
- print("Working with context:")
231
- print(run_context)
244
+ console.print("Working with context:")
245
+ console.print(run_context)
232
246
 
233
247
  executor = run_context.executor
234
248
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -280,8 +294,8 @@ def execute_notebook(
280
294
  run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
281
295
  utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
282
296
 
283
- print("Working with context:")
284
- print(run_context)
297
+ console.print("Working with context:")
298
+ console.print(run_context)
285
299
 
286
300
  step_config = {
287
301
  "command": notebook_file,
@@ -342,8 +356,8 @@ def execute_function(
342
356
  run_context.execution_plan = defaults.EXECUTION_PLAN.UNCHAINED.value
343
357
  utils.set_runnable_environment_variables(run_id=run_id, configuration_file=configuration_file, tag=tag)
344
358
 
345
- print("Working with context:")
346
- print(run_context)
359
+ console.print("Working with context:")
360
+ console.print(run_context)
347
361
 
348
362
  # Prepare the graph with a single node
349
363
  step_config = {
@@ -411,8 +425,8 @@ def fan(
411
425
  tag=tag,
412
426
  parameters_file=parameters_file,
413
427
  )
414
- print("Working with context:")
415
- print(run_context)
428
+ console.print("Working with context:")
429
+ console.print(run_context)
416
430
 
417
431
  executor = run_context.executor
418
432
  run_context.execution_plan = defaults.EXECUTION_PLAN.CHAINED.value
@@ -437,4 +451,4 @@ def fan(
437
451
 
438
452
  if __name__ == "__main__":
439
453
  # This is only for perf testing purposes.
440
- prepare_configurations(run_id="abc", pipeline_file="example/mocking.yaml")
454
+ prepare_configurations(run_id="abc", pipeline_file="examples/mocking.yaml")
runnable/exceptions.py CHANGED
@@ -92,3 +92,7 @@ class ExecutionFailedError(Exception): # pragma: no cover
92
92
  def __init__(self, run_id: str):
93
93
  super().__init__()
94
94
  self.message = f"Execution failed for run id: {run_id}"
95
+
96
+
97
+ class CommandCallError(Exception): # pragma: no cover
98
+ "An exception during the call of the command"
@@ -2,7 +2,7 @@ import logging
2
2
  import os
3
3
  import shutil
4
4
  from pathlib import Path
5
- from typing import List, Optional
5
+ from typing import Any, Dict, List, Optional
6
6
 
7
7
  from runnable import defaults, utils
8
8
  from runnable.catalog import BaseCatalog
@@ -34,6 +34,13 @@ class FileSystemCatalog(BaseCatalog):
34
34
  def get_catalog_location(self):
35
35
  return self.catalog_location
36
36
 
37
+ def get_summary(self) -> Dict[str, Any]:
38
+ summary = {
39
+ "Catalog Location": self.get_catalog_location(),
40
+ }
41
+
42
+ return summary
43
+
37
44
  def get(self, name: str, run_id: str, compute_data_folder: str = "", **kwargs) -> List[DataCatalog]:
38
45
  """
39
46
  Get the file by matching glob pattern to the name