runnable 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runnable/tasks.py CHANGED
@@ -1,4 +1,3 @@
1
- import ast
2
1
  import contextlib
3
2
  import importlib
4
3
  import io
@@ -7,7 +6,7 @@ import logging
7
6
  import os
8
7
  import subprocess
9
8
  import sys
10
- from typing import Any, Dict, Tuple
9
+ from typing import Any, Dict, List, Optional, Tuple
11
10
 
12
11
  from pydantic import BaseModel, ConfigDict, Field, ValidationInfo, field_validator
13
12
  from pydantic._internal._model_construction import ModelMetaclass
@@ -29,6 +28,7 @@ class BaseTaskType(BaseModel):
29
28
 
30
29
  task_type: str = Field(serialization_alias="command_type")
31
30
  node_name: str = Field(exclude=True)
31
+ secrets: Dict[str, str] = Field(default_factory=dict)
32
32
 
33
33
  model_config = ConfigDict(extra="forbid")
34
34
 
@@ -50,20 +50,23 @@ class BaseTaskType(BaseModel):
50
50
  """
51
51
  raise NotImplementedError()
52
52
 
53
- def _get_parameters(self, map_variable: TypeMapVariable = None, **kwargs) -> Dict[str, Any]:
54
- """
55
- By this step, all the parameters are present as environment variables as json strings.
56
- Return the parameters in scope for the execution.
57
-
58
- Args:
59
- map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
60
-
61
- Returns:
62
- dict: The parameters dictionary in-scope for the task execution
63
- """
64
- return parameters.get_user_set_parameters(remove=False)
65
-
66
- def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
53
+ def set_secrets_as_env_variables(self):
54
+ for key, value in self.secrets.items():
55
+ secret_value = context.run_context.secrets_handler.get(key)
56
+ self.secrets[value] = secret_value
57
+ os.environ[value] = secret_value
58
+
59
+ def delete_secrets_from_env_variables(self):
60
+ for _, value in self.secrets.items():
61
+ if value in os.environ:
62
+ del os.environ[value]
63
+
64
+ def execute_command(
65
+ self,
66
+ params: Optional[Dict[str, Any]] = None,
67
+ map_variable: TypeMapVariable = None,
68
+ **kwargs,
69
+ ) -> Optional[Dict[str, Any]]:
67
70
  """The function to execute the command.
68
71
 
69
72
  And map_variable is sent in as an argument into the function.
@@ -76,20 +79,19 @@ class BaseTaskType(BaseModel):
76
79
  """
77
80
  raise NotImplementedError()
78
81
 
79
- def _set_parameters(self, params: BaseModel, **kwargs):
80
- """Set the parameters back to the environment variables.
82
+ @contextlib.contextmanager
83
+ def expose_secrets(self, map_variable: TypeMapVariable = None):
84
+ """Context manager to expose secrets to the execution.
81
85
 
82
86
  Args:
83
- parameters (dict, optional): The parameters to set back as env variables. Defaults to None.
84
- """
85
- # Nothing to do
86
- if not params:
87
- return
88
-
89
- if not isinstance(params, BaseModel) or isinstance(params, ModelMetaclass):
90
- raise ValueError("Output variable of a function can only be a pydantic model or dynamic model.")
87
+ map_variable (dict, optional): If the command is part of map node, the value of map. Defaults to None.
91
88
 
92
- parameters.set_user_defined_params_as_environment_variables(params.model_dump(by_alias=True))
89
+ """
90
+ self.set_secrets_as_env_variables()
91
+ try:
92
+ yield
93
+ finally:
94
+ self.delete_secrets_from_env_variables()
93
95
 
94
96
  @contextlib.contextmanager
95
97
  def output_to_file(self, map_variable: TypeMapVariable = None):
@@ -122,32 +124,12 @@ class BaseTaskType(BaseModel):
122
124
  os.remove(log_file.name)
123
125
 
124
126
 
125
- class EasyModel(BaseModel):
126
- model_config = ConfigDict(extra="allow")
127
-
128
-
129
- def make_pydantic_model(
130
- variables: Dict[str, Any],
131
- prefix: str = "",
132
- ) -> BaseModel:
133
- prefix_removed = {utils.remove_prefix(k, prefix): v for k, v in variables.items()}
134
- return EasyModel(**prefix_removed)
135
-
136
-
137
127
  class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
138
128
  """The task class for python command."""
139
129
 
140
130
  task_type: str = Field(default="python", serialization_alias="command_type")
141
131
  command: str
142
132
 
143
- @field_validator("command")
144
- @classmethod
145
- def validate_command(cls, command: str):
146
- if not command:
147
- raise Exception("Command cannot be empty for shell task")
148
-
149
- return command
150
-
151
133
  def get_cli_options(self) -> Tuple[str, dict]:
152
134
  """Return the cli options for the task.
153
135
 
@@ -156,22 +138,26 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
156
138
  """
157
139
  return "function", {"command": self.command}
158
140
 
159
- def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
141
+ def execute_command(
142
+ self,
143
+ params: Optional[Dict[str, Any]] = None,
144
+ map_variable: TypeMapVariable = None,
145
+ **kwargs,
146
+ ) -> Optional[Dict[str, Any]]:
160
147
  """Execute the notebook as defined by the command."""
148
+ if not params:
149
+ params = {}
150
+
161
151
  module, func = utils.get_module_and_attr_names(self.command)
162
152
  sys.path.insert(0, os.getcwd()) # Need to add the current directory to path
163
153
  imported_module = importlib.import_module(module)
164
154
  f = getattr(imported_module, func)
165
155
 
166
- params = self._get_parameters()
167
156
  filtered_parameters = parameters.filter_arguments_for_func(f, params, map_variable)
168
157
 
169
- if map_variable:
170
- os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable)
171
-
172
158
  logger.info(f"Calling {func} from {module} with {filtered_parameters}")
173
159
 
174
- with self.output_to_file(map_variable=map_variable) as _:
160
+ with self.output_to_file(map_variable=map_variable) as _, self.expose_secrets() as _:
175
161
  try:
176
162
  user_set_parameters = f(**filtered_parameters)
177
163
  except Exception as _e:
@@ -180,10 +166,13 @@ class PythonTaskType(BaseTaskType): # pylint: disable=too-few-public-methods
180
166
  logger.exception(_e)
181
167
  raise
182
168
 
183
- if map_variable:
184
- del os.environ[defaults.MAP_VARIABLE]
169
+ if user_set_parameters is None:
170
+ return {}
171
+
172
+ if not isinstance(user_set_parameters, BaseModel) or isinstance(user_set_parameters, ModelMetaclass):
173
+ raise ValueError("Output variable of a function can only be a pydantic model or dynamic model.")
185
174
 
186
- self._set_parameters(user_set_parameters)
175
+ return user_set_parameters.model_dump(by_alias=True)
187
176
 
188
177
 
189
178
  class NotebookTaskType(BaseTaskType):
@@ -192,17 +181,13 @@ class NotebookTaskType(BaseTaskType):
192
181
  task_type: str = Field(default="notebook", serialization_alias="command_type")
193
182
  command: str
194
183
  notebook_output_path: str = Field(default="", validate_default=True)
195
- output_cell_tag: str = Field(default="magnus_output", validate_default=True)
184
+ returns: Optional[List[str]] = Field(default_factory=list)
185
+ output_cell_tag: str = Field(default="runnable_output", validate_default=True)
196
186
  optional_ploomber_args: dict = {}
197
187
 
198
- _output_tag: str = "magnus_output"
199
-
200
188
  @field_validator("command")
201
189
  @classmethod
202
190
  def notebook_should_end_with_ipynb(cls, command: str):
203
- if not command:
204
- raise Exception("Command should point to the ipynb file")
205
-
206
191
  if not command.endswith(".ipynb"):
207
192
  raise Exception("Notebook task should point to a ipynb file")
208
193
 
@@ -220,24 +205,12 @@ class NotebookTaskType(BaseTaskType):
220
205
  def get_cli_options(self) -> Tuple[str, dict]:
221
206
  return "notebook", {"command": self.command, "notebook-output-path": self.notebook_output_path}
222
207
 
223
- def _parse_notebook_for_output(self, notebook: Any):
224
- collected_params = {}
225
-
226
- for cell in notebook.cells:
227
- d = cell.dict()
228
- # identify the tags attached to the cell.
229
- tags = d.get("metadata", {}).get("tags", {})
230
- if self.output_cell_tag in tags:
231
- # There is a tag that has output
232
- outputs = d["outputs"]
233
-
234
- for out in outputs:
235
- params = out.get("text", "{}")
236
- collected_params.update(ast.literal_eval(params))
237
-
238
- return collected_params
239
-
240
- def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
208
+ def execute_command(
209
+ self,
210
+ params: Optional[Dict[str, Any]] = None,
211
+ map_variable: TypeMapVariable = None,
212
+ **kwargs,
213
+ ) -> Optional[Dict[str, Any]]:
241
214
  """Execute the python notebook as defined by the command.
242
215
 
243
216
  Args:
@@ -247,44 +220,51 @@ class NotebookTaskType(BaseTaskType):
247
220
  ImportError: If necessary dependencies are not installed
248
221
  Exception: If anything else fails
249
222
  """
223
+ if not params:
224
+ params = {}
225
+
250
226
  try:
251
227
  import ploomber_engine as pm
228
+ from ploomber_engine.ipython import PloomberClient
252
229
 
253
230
  from runnable import put_in_catalog # Causes issues with cyclic import
254
231
 
255
- parameters = self._get_parameters()
256
- filtered_parameters = parameters
257
-
258
232
  notebook_output_path = self.notebook_output_path
259
233
 
260
234
  if map_variable:
261
- os.environ[defaults.MAP_VARIABLE] = json.dumps(map_variable)
262
-
263
- for _, value in map_variable.items():
235
+ for key, value in map_variable.items():
264
236
  notebook_output_path += "_" + str(value)
265
237
 
238
+ params[key] = value
239
+
266
240
  ploomber_optional_args = self.optional_ploomber_args
267
241
 
268
242
  kwds = {
269
243
  "input_path": self.command,
270
244
  "output_path": notebook_output_path,
271
- "parameters": filtered_parameters,
245
+ "parameters": params,
272
246
  "log_output": True,
273
247
  "progress_bar": False,
274
248
  }
275
249
  kwds.update(ploomber_optional_args)
276
250
 
277
251
  collected_params: Dict[str, Any] = {}
278
- with self.output_to_file(map_variable=map_variable) as _:
279
- out = pm.execute_notebook(**kwds)
280
- collected_params = self._parse_notebook_for_output(out)
281
-
282
- collected_params_model = make_pydantic_model(collected_params)
283
- self._set_parameters(collected_params_model)
252
+ with self.output_to_file(map_variable=map_variable) as _, self.expose_secrets() as _:
253
+ pm.execute_notebook(**kwds)
284
254
 
285
255
  put_in_catalog(notebook_output_path)
286
- if map_variable:
287
- del os.environ[defaults.MAP_VARIABLE]
256
+
257
+ client = PloomberClient.from_path(path=notebook_output_path)
258
+ namespace = client.get_namespace()
259
+
260
+ for key, value in namespace.items():
261
+ if key in (self.returns or []):
262
+ if isinstance(value, BaseModel):
263
+ collected_params[key] = value.model_dump(by_alias=True)
264
+ continue
265
+ collected_params[key] = value
266
+
267
+ return collected_params
288
268
 
289
269
  except ImportError as e:
290
270
  msg = (
@@ -299,32 +279,55 @@ class ShellTaskType(BaseTaskType):
299
279
  """
300
280
 
301
281
  task_type: str = Field(default="shell", serialization_alias="command_type")
282
+ returns: Optional[List[str]] = Field(default_factory=list)
302
283
  command: str
303
284
 
304
- @field_validator("command")
305
- @classmethod
306
- def validate_command(cls, command: str):
307
- if not command:
308
- raise Exception("Command cannot be empty for shell task")
309
-
310
- return command
311
-
312
- def execute_command(self, map_variable: TypeMapVariable = None, **kwargs):
285
+ def execute_command(
286
+ self,
287
+ params: Optional[Dict[str, Any]] = None,
288
+ map_variable: TypeMapVariable = None,
289
+ **kwargs,
290
+ ) -> Optional[Dict[str, Any]]:
313
291
  # Using shell=True as we want to have chained commands to be executed in the same shell.
314
292
  """Execute the shell command as defined by the command.
315
293
 
316
294
  Args:
317
295
  map_variable (dict, optional): If the node is part of an internal branch. Defaults to None.
318
296
  """
319
- subprocess_env = os.environ.copy()
297
+ if not params:
298
+ params = {}
299
+
300
+ runnable_env_vars: Dict[str, Any] = {}
320
301
 
302
+ # Expose RUNNABLE environment variables, ignoring the parameters, to be passed to the subprocess.
303
+ for key, value in os.environ.items():
304
+ if key.startswith("RUNNABLE_") and not key.startswith("RUNNABLE_PRM_"):
305
+ runnable_env_vars[key] = value
306
+
307
+ subprocess_env = {**params, **runnable_env_vars}
308
+
309
+ # Expose map variable as environment variables
321
310
  if map_variable:
322
- subprocess_env[defaults.MAP_VARIABLE] = json.dumps(map_variable)
311
+ for key, value in map_variable.items(): # type: ignore
312
+ subprocess_env[key] = str(value)
313
+
314
+ # Expose secrets as environment variables
315
+ if self.secrets:
316
+ for key, value in self.secrets.items():
317
+ secret_value = context.run_context.secrets_handler.get(key)
318
+ subprocess_env[value] = secret_value
319
+
320
+ # Json dumps all runnable environment variables
321
+ for key, value in subprocess_env.items():
322
+ subprocess_env[key] = json.dumps(value)
323
323
 
324
- command = self.command.strip() + " && env | grep MAGNUS"
324
+ collect_delimiter = "=== COLLECT ==="
325
+
326
+ command = self.command.strip() + f" && echo '{collect_delimiter}' && env"
325
327
  logger.info(f"Executing shell command: {command}")
326
328
 
327
329
  output_parameters = {}
330
+ capture = False
328
331
 
329
332
  with subprocess.Popen(
330
333
  command,
@@ -338,27 +341,27 @@ class ShellTaskType(BaseTaskType):
338
341
  logger.info(line)
339
342
  print(line)
340
343
 
341
- if line.startswith(defaults.PARAMETER_PREFIX):
344
+ if line.strip() == collect_delimiter:
345
+ capture = True
346
+ continue
347
+
348
+ if capture:
342
349
  key, value = line.strip().split("=", 1)
343
- try:
344
- output_parameters[key] = json.loads(value)
345
- except json.JSONDecodeError:
346
- output_parameters[key] = value # simple data types
350
+ if key in (self.returns or []):
351
+ try:
352
+ output_parameters[key] = json.loads(value)
353
+ except json.JSONDecodeError:
354
+ output_parameters[key] = value # simple data types
347
355
 
348
- if line.startswith(defaults.TRACK_PREFIX):
349
- key, value = line.split("=", 1)
350
- os.environ[key] = value.strip()
356
+ # if line.startswith(defaults.TRACK_PREFIX):
357
+ # key, value = line.split("=", 1)
358
+ # os.environ[key] = value.strip()
351
359
 
352
360
  proc.wait()
353
361
  if proc.returncode != 0:
354
362
  raise Exception("Command failed")
355
363
 
356
- self._set_parameters(
357
- params=make_pydantic_model(
358
- output_parameters,
359
- defaults.PARAMETER_PREFIX,
360
- )
361
- )
364
+ return output_parameters
362
365
 
363
366
 
364
367
  def create_task(kwargs_for_init) -> BaseTaskType:
runnable/utils.py CHANGED
@@ -281,10 +281,10 @@ def get_git_code_identity():
281
281
  """Returns a code identity object for version controlled code.
282
282
 
283
283
  Args:
284
- run_log_store (magnus.datastore.BaseRunLogStore): The run log store used in this process
284
+ run_log_store (runnable.datastore.BaseRunLogStore): The run log store used in this process
285
285
 
286
286
  Returns:
287
- magnus.datastore.CodeIdentity: The code identity used by the run log store.
287
+ runnable.datastore.CodeIdentity: The code identity used by the run log store.
288
288
  """
289
289
  code_identity = context.run_context.run_log_store.create_code_identity()
290
290
  try:
@@ -316,7 +316,7 @@ def remove_prefix(text: str, prefix: str) -> str:
316
316
 
317
317
 
318
318
  def get_tracked_data() -> Dict[str, str]:
319
- """Scans the environment variables to find any user tracked variables that have a prefix MAGNUS_TRACK_
319
+ """Scans the environment variables to find any user tracked variables that have a prefix runnable_TRACK_
320
320
  Removes the environment variable to prevent any clashes in the future steps.
321
321
 
322
322
  Returns:
@@ -412,7 +412,7 @@ def get_node_execution_command(
412
412
 
413
413
  log_level = logging.getLevelName(logger.getEffectiveLevel())
414
414
 
415
- action = f"magnus execute_single_node {run_id} " f"{node._command_friendly_name()}" f" --log-level {log_level}"
415
+ action = f"runnable execute_single_node {run_id} " f"{node._command_friendly_name()}" f" --log-level {log_level}"
416
416
 
417
417
  if context.run_context.pipeline_file:
418
418
  action = action + f" --file {context.run_context.pipeline_file}"
@@ -453,7 +453,7 @@ def get_fan_command(
453
453
  """
454
454
  log_level = logging.getLevelName(logger.getEffectiveLevel())
455
455
  action = (
456
- f"magnus fan {run_id} "
456
+ f"runnable fan {run_id} "
457
457
  f"{node._command_friendly_name()} "
458
458
  f"--mode {mode} "
459
459
  f"--file {context.run_context.pipeline_file} "
@@ -497,7 +497,7 @@ def get_job_execution_command(node: TaskNode, over_write_run_id: str = "") -> st
497
497
 
498
498
  cli_command, cli_options = node.executable.get_cli_options()
499
499
 
500
- action = f"magnus execute_{cli_command} {run_id} " f" --log-level {log_level}"
500
+ action = f"runnable execute_{cli_command} {run_id} " f" --log-level {log_level}"
501
501
 
502
502
  action = action + f" --entrypoint {defaults.ENTRYPOINT.SYSTEM.value}"
503
503
 
@@ -595,8 +595,8 @@ def json_to_ordered_dict(json_str: str) -> TypeMapVariable:
595
595
  return OrderedDict()
596
596
 
597
597
 
598
- def set_magnus_environment_variables(run_id: str = "", configuration_file: str = "", tag: str = "") -> None:
599
- """Set the environment variables used by magnus. This function should be called during the prepare configurations
598
+ def set_runnable_environment_variables(run_id: str = "", configuration_file: str = "", tag: str = "") -> None:
599
+ """Set the environment variables used by runnable. This function should be called during the prepare configurations
600
600
  by all executors.
601
601
 
602
602
  Args:
@@ -608,14 +608,14 @@ def set_magnus_environment_variables(run_id: str = "", configuration_file: str =
608
608
  os.environ[defaults.ENV_RUN_ID] = run_id
609
609
 
610
610
  if configuration_file:
611
- os.environ[defaults.MAGNUS_CONFIG_FILE] = configuration_file
611
+ os.environ[defaults.RUNNABLE_CONFIG_FILE] = configuration_file
612
612
 
613
613
  if tag:
614
- os.environ[defaults.MAGNUS_RUN_TAG] = tag
614
+ os.environ[defaults.RUNNABLE_RUN_TAG] = tag
615
615
 
616
616
 
617
617
  def gather_variables() -> dict:
618
- """Gather all the environment variables used by magnus. All the variables start with MAGNUS_VAR_.
618
+ """Gather all the environment variables used by runnable. All the variables start with runnable_VAR_.
619
619
 
620
620
  Returns:
621
621
  dict: All the environment variables present in the environment.