runnable 0.36.1__tar.gz → 0.38.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {runnable-0.36.1 → runnable-0.38.0}/.gitignore +1 -0
  2. {runnable-0.36.1 → runnable-0.38.0}/PKG-INFO +2 -4
  3. {runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/any_path.py +13 -2
  4. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/__init__.py +4 -1
  5. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/__init__.py +3 -1
  6. {runnable-0.36.1 → runnable-0.38.0}/pyproject.toml +6 -5
  7. {runnable-0.36.1 → runnable-0.38.0}/runnable/catalog.py +5 -2
  8. {runnable-0.36.1 → runnable-0.38.0}/runnable/context.py +1 -0
  9. {runnable-0.36.1 → runnable-0.38.0}/runnable/graph.py +1 -1
  10. {runnable-0.36.1 → runnable-0.38.0}/runnable/nodes.py +2 -0
  11. runnable-0.38.0/runnable/parameters.py +215 -0
  12. {runnable-0.36.1 → runnable-0.38.0}/runnable/sdk.py +8 -0
  13. runnable-0.36.1/runnable/parameters.py +0 -144
  14. {runnable-0.36.1 → runnable-0.38.0}/LICENSE +0 -0
  15. {runnable-0.36.1 → runnable-0.38.0}/README.md +0 -0
  16. {runnable-0.36.1 → runnable-0.38.0}/extensions/README.md +0 -0
  17. {runnable-0.36.1 → runnable-0.38.0}/extensions/__init__.py +0 -0
  18. {runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/README.md +0 -0
  19. {runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/file_system.py +0 -0
  20. {runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/minio.py +0 -0
  21. {runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/pyproject.toml +0 -0
  22. {runnable-0.36.1 → runnable-0.38.0}/extensions/catalog/s3.py +0 -0
  23. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/README.md +0 -0
  24. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/emulate.py +0 -0
  25. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/k8s.py +0 -0
  26. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/k8s_job_spec.yaml +0 -0
  27. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/local.py +0 -0
  28. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/local_container.py +0 -0
  29. {runnable-0.36.1 → runnable-0.38.0}/extensions/job_executor/pyproject.toml +0 -0
  30. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/README.md +0 -0
  31. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/__init__.py +0 -0
  32. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/conditional.py +0 -0
  33. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/fail.py +0 -0
  34. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/map.py +0 -0
  35. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/parallel.py +0 -0
  36. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/pyproject.toml +0 -0
  37. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/stub.py +0 -0
  38. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/success.py +0 -0
  39. {runnable-0.36.1 → runnable-0.38.0}/extensions/nodes/task.py +0 -0
  40. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/README.md +0 -0
  41. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/argo.py +0 -0
  42. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/emulate.py +0 -0
  43. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/local.py +0 -0
  44. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/local_container.py +0 -0
  45. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/mocked.py +0 -0
  46. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/pyproject.toml +0 -0
  47. {runnable-0.36.1 → runnable-0.38.0}/extensions/pipeline_executor/retry.py +0 -0
  48. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/README.md +0 -0
  49. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/__init__.py +0 -0
  50. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/any_path.py +0 -0
  51. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/chunked_fs.py +0 -0
  52. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/chunked_minio.py +0 -0
  53. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/db/implementation_FF.py +0 -0
  54. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/db/integration_FF.py +0 -0
  55. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/file_system.py +0 -0
  56. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/generic_chunked.py +0 -0
  57. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/minio.py +0 -0
  58. {runnable-0.36.1 → runnable-0.38.0}/extensions/run_log_store/pyproject.toml +0 -0
  59. {runnable-0.36.1 → runnable-0.38.0}/extensions/secrets/README.md +0 -0
  60. {runnable-0.36.1 → runnable-0.38.0}/extensions/secrets/dotenv.py +0 -0
  61. {runnable-0.36.1 → runnable-0.38.0}/extensions/secrets/pyproject.toml +0 -0
  62. {runnable-0.36.1 → runnable-0.38.0}/runnable/__init__.py +0 -0
  63. {runnable-0.36.1 → runnable-0.38.0}/runnable/cli.py +0 -0
  64. {runnable-0.36.1 → runnable-0.38.0}/runnable/datastore.py +0 -0
  65. {runnable-0.36.1 → runnable-0.38.0}/runnable/defaults.py +0 -0
  66. {runnable-0.36.1 → runnable-0.38.0}/runnable/entrypoints.py +0 -0
  67. {runnable-0.36.1 → runnable-0.38.0}/runnable/exceptions.py +0 -0
  68. {runnable-0.36.1 → runnable-0.38.0}/runnable/executor.py +0 -0
  69. {runnable-0.36.1 → runnable-0.38.0}/runnable/names.py +0 -0
  70. {runnable-0.36.1 → runnable-0.38.0}/runnable/pickler.py +0 -0
  71. {runnable-0.36.1 → runnable-0.38.0}/runnable/secrets.py +0 -0
  72. {runnable-0.36.1 → runnable-0.38.0}/runnable/tasks.py +0 -0
  73. {runnable-0.36.1 → runnable-0.38.0}/runnable/utils.py +0 -0
@@ -157,3 +157,4 @@ cov.xml
157
157
  data/
158
158
 
159
159
  minikube/
160
+ .pth # For model saving and loading
@@ -1,12 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: runnable
3
- Version: 0.36.1
3
+ Version: 0.38.0
4
4
  Summary: Add your description here
5
5
  Author-email: "Vammi, Vijay" <vijay.vammi@astrazeneca.com>
6
6
  License-File: LICENSE
7
7
  Requires-Python: >=3.10
8
- Requires-Dist: click-plugins>=1.1.1
9
- Requires-Dist: click<=8.1.3
10
8
  Requires-Dist: cloudpathlib>=0.20.0
11
9
  Requires-Dist: dill>=0.3.9
12
10
  Requires-Dist: pydantic>=2.10.3
@@ -15,7 +13,7 @@ Requires-Dist: rich>=13.9.4
15
13
  Requires-Dist: ruamel-yaml>=0.18.6
16
14
  Requires-Dist: setuptools>=75.6.0
17
15
  Requires-Dist: stevedore>=5.4.0
18
- Requires-Dist: typer>=0.15.1
16
+ Requires-Dist: typer>=0.17.3
19
17
  Provides-Extra: docker
20
18
  Requires-Dist: docker>=7.1.0; extra == 'docker'
21
19
  Provides-Extra: examples
@@ -95,7 +95,10 @@ class AnyPathCatalog(BaseCatalog):
95
95
  return data_catalogs
96
96
 
97
97
  def put(
98
- self, name: str, allow_file_not_found_exc: bool = False
98
+ self,
99
+ name: str,
100
+ allow_file_not_found_exc: bool = False,
101
+ store_copy: bool = True,
99
102
  ) -> List[DataCatalog]:
100
103
  """
101
104
  Put the files matching the glob pattern into the catalog.
@@ -154,7 +157,15 @@ class AnyPathCatalog(BaseCatalog):
154
157
  data_catalogs.append(data_catalog)
155
158
 
156
159
  # TODO: Think about syncing only if the file is changed
157
- self.upload_to_catalog(file)
160
+ if store_copy:
161
+ logger.debug(
162
+ f"Copying file {file} to the catalog location for run_id: {run_id}"
163
+ )
164
+ self.upload_to_catalog(file)
165
+ else:
166
+ logger.debug(
167
+ f"Not copying file {file} to the catalog location for run_id: {run_id}"
168
+ )
158
169
 
159
170
  if not data_catalogs and not allow_file_not_found_exc:
160
171
  raise Exception(f"Did not find any files matching {name} in {copy_from}")
@@ -29,6 +29,7 @@ class GenericJobExecutor(BaseJobExecutor):
29
29
  @property
30
30
  def _context(self):
31
31
  assert context.run_context
32
+ assert isinstance(context.run_context, context.JobContext)
32
33
  return context.run_context
33
34
 
34
35
  def _get_parameters(self) -> Dict[str, JsonParameter]:
@@ -147,7 +148,9 @@ class GenericJobExecutor(BaseJobExecutor):
147
148
  data_catalogs = []
148
149
  for name_pattern in catalog_settings:
149
150
  data_catalog = self._context.catalog.put(
150
- name=name_pattern, allow_file_not_found_exc=allow_file_not_found_exc
151
+ name=name_pattern,
152
+ allow_file_not_found_exc=allow_file_not_found_exc,
153
+ store_copy=self._context.catalog_store_copy,
151
154
  )
152
155
 
153
156
  logger.debug(f"Added data catalog: {data_catalog} to job log")
@@ -160,7 +160,9 @@ class GenericPipelineExecutor(BasePipelineExecutor):
160
160
 
161
161
  elif stage == "put":
162
162
  data_catalog = self._context.catalog.put(
163
- name=name_pattern, allow_file_not_found_exc=allow_file_no_found_exc
163
+ name=name_pattern,
164
+ allow_file_not_found_exc=allow_file_no_found_exc,
165
+ store_copy=node_catalog_settings.get("store_copy", True),
164
166
  )
165
167
  else:
166
168
  raise Exception(f"Stage {stage} not supported")
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "runnable"
3
- version = "0.36.1"
3
+ version = "0.38.0"
4
4
  description = "Add your description here"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -8,8 +8,6 @@ authors = [
8
8
  ]
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
11
- "click-plugins>=1.1.1",
12
- "click<=8.1.3",
13
11
  "pydantic>=2.10.3",
14
12
  "ruamel-yaml>=0.18.6",
15
13
  "stevedore>=5.4.0",
@@ -17,7 +15,7 @@ dependencies = [
17
15
  "dill>=0.3.9",
18
16
  "setuptools>=75.6.0",
19
17
  "python-dotenv>=1.0.1",
20
- "typer>=0.15.1",
18
+ "typer>=0.17.3",
21
19
  "cloudpathlib>=0.20.0",
22
20
  ]
23
21
 
@@ -58,6 +56,9 @@ docs = [
58
56
  release = [
59
57
  "python-semantic-release>=9.15.2",
60
58
  ]
59
+ examples-torch = [
60
+ "torch>=2.7.1",
61
+ ]
61
62
 
62
63
  [tool.uv.workspace]
63
64
  members = ["extensions/catalog",
@@ -148,7 +149,7 @@ file-system = "extensions.run_log_store.file_system:FileSystemRunLogstore"
148
149
 
149
150
  # Release configuration
150
151
  [tool.semantic_release]
151
- commit_parser = "angular"
152
+ commit_parser = "conventional"
152
153
  major_on_zero = true
153
154
  allow_zero_version = true
154
155
  tag_format = "{version}"
@@ -57,7 +57,7 @@ class BaseCatalog(ABC, BaseModel):
57
57
 
58
58
  @abstractmethod
59
59
  def put(
60
- self, name: str, allow_file_not_found_exc: bool = False
60
+ self, name: str, allow_file_not_found_exc: bool = False, store_copy: bool = True
61
61
  ) -> List[DataCatalog]:
62
62
  """
63
63
  Put the file by 'name' from the 'compute_data_folder' in the catalog for the run_id.
@@ -120,7 +120,10 @@ class DoNothingCatalog(BaseCatalog):
120
120
  return []
121
121
 
122
122
  def put(
123
- self, name: str, allow_file_not_found_exc: bool = False
123
+ self,
124
+ name: str,
125
+ allow_file_not_found_exc: bool = False,
126
+ store_copy: bool = True,
124
127
  ) -> List[DataCatalog]:
125
128
  """
126
129
  Does nothing
@@ -475,6 +475,7 @@ class JobContext(RunnableContext):
475
475
  default=None,
476
476
  description="Catalog settings to be used for the job.",
477
477
  )
478
+ catalog_store_copy: bool = Field(default=True, alias="catalog_store_copy")
478
479
 
479
480
  @computed_field # type: ignore
480
481
  @cached_property
@@ -329,7 +329,7 @@ def create_graph(dag_config: Dict[str, Any], internal_branch_name: str = "") ->
329
329
  Returns:
330
330
  Graph: The created graph object
331
331
  """
332
- description: str = dag_config.get("description", None)
332
+ description: str | None = dag_config.get("description", None)
333
333
  start_at: str = cast(
334
334
  str, dag_config.get("start_at")
335
335
  ) # Let the start_at be relative to the graph
@@ -411,11 +411,13 @@ class TraversalNode(BaseNode):
411
411
  return self.overrides.get(executor_type) or ""
412
412
 
413
413
 
414
+ # Unfortunately, this is defined in 2 places. Look in SDK
414
415
  class CatalogStructure(BaseModel):
415
416
  model_config = ConfigDict(extra="forbid") # Need to forbid
416
417
 
417
418
  get: List[str] = Field(default_factory=list)
418
419
  put: List[str] = Field(default_factory=list)
420
+ store_copy: bool = Field(default=True, alias="store_copy")
419
421
 
420
422
 
421
423
  class ExecutableNode(TraversalNode):
@@ -0,0 +1,215 @@
1
+ import argparse
2
+ import inspect
3
+ import json
4
+ import logging
5
+ import os
6
+ from typing import Any, Dict, Type
7
+
8
+ from pydantic import BaseModel, ConfigDict
9
+ from typing_extensions import Callable
10
+
11
+ from runnable import defaults
12
+ from runnable.datastore import JsonParameter, ObjectParameter
13
+ from runnable.defaults import MapVariableType
14
+ from runnable.utils import remove_prefix
15
+
16
+ logger = logging.getLogger(defaults.LOGGER_NAME)
17
+
18
+
19
+ def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]:
20
+ """
21
+ Scans the environment variables for any user returned parameters that have a prefix runnable_PRM_.
22
+
23
+ This function does not deal with any type conversion of the parameters.
24
+ It just deserializes the parameters and returns them as a dictionary.
25
+
26
+ Args:
27
+ remove (bool, optional): Flag to remove the parameter if needed. Defaults to False.
28
+
29
+ Returns:
30
+ dict: The dictionary of found user returned parameters
31
+ """
32
+ parameters: Dict[str, JsonParameter] = {}
33
+ for env_var, value in os.environ.items():
34
+ if env_var.startswith(defaults.PARAMETER_PREFIX):
35
+ key = remove_prefix(env_var, defaults.PARAMETER_PREFIX)
36
+ try:
37
+ parameters[key.lower()] = JsonParameter(
38
+ kind="json", value=json.loads(value)
39
+ )
40
+ except json.decoder.JSONDecodeError:
41
+ logger.warning(
42
+ f"Parameter {key} could not be JSON decoded, adding the literal value"
43
+ )
44
+ parameters[key.lower()] = JsonParameter(kind="json", value=value)
45
+
46
+ if remove:
47
+ del os.environ[env_var]
48
+ return parameters
49
+
50
+
51
+ def return_json_parameters(params: Dict[str, Any]) -> Dict[str, Any]:
52
+ """
53
+ Returns the parameters as a JSON serializable dictionary.
54
+
55
+ Args:
56
+ params (dict): The parameters to serialize.
57
+
58
+ Returns:
59
+ dict: The JSON serializable dictionary.
60
+ """
61
+ return_params = {}
62
+ for key, value in params.items():
63
+ if isinstance(value, ObjectParameter):
64
+ continue
65
+
66
+ return_params[key] = value.get_value()
67
+ return return_params
68
+
69
+
70
+ def filter_arguments_for_func(
71
+ func: Callable[..., Any],
72
+ params: Dict[str, Any],
73
+ map_variable: MapVariableType = None,
74
+ ) -> Dict[str, Any]:
75
+ """
76
+ Inspects the function to be called as part of the pipeline to find the arguments of the function.
77
+ Matches the function arguments to the parameters available either by static parameters or by up stream steps.
78
+
79
+ The function "func" signature could be:
80
+ - def my_function(arg1: int, arg2: str, arg3: float):
81
+ - def my_function(arg1: int, arg2: str, arg3: float, **kwargs):
82
+ in this case, we would need to send in remaining keyword arguments as a dictionary.
83
+ - def my_function(arg1: int, arg2: str, arg3: float, args: argparse.Namespace):
84
+ In this case, we need to send the rest of the parameters as attributes of the args object.
85
+
86
+ Args:
87
+ func (Callable): The function to inspect
88
+ parameters (dict): The parameters available for the run
89
+
90
+ Returns:
91
+ dict: The parameters matching the function signature
92
+ """
93
+ function_args = inspect.signature(func).parameters
94
+
95
+ # Update parameters with the map variables
96
+ for key, v in (map_variable or {}).items():
97
+ params[key] = JsonParameter(kind="json", value=v)
98
+
99
+ bound_args = {}
100
+ missing_required_args: list[str] = []
101
+ var_keyword_param = None
102
+ namespace_param = None
103
+
104
+ # First pass: Handle regular parameters and identify special parameters
105
+ for name, value in function_args.items():
106
+ # Ignore any *args
107
+ if value.kind == inspect.Parameter.VAR_POSITIONAL:
108
+ logger.warning(f"Ignoring parameter {name} as it is VAR_POSITIONAL")
109
+ continue
110
+
111
+ # Check for **kwargs parameter
112
+ if value.kind == inspect.Parameter.VAR_KEYWORD:
113
+ var_keyword_param = name
114
+ continue
115
+
116
+ # Check for argparse.Namespace parameter
117
+ if value.annotation == argparse.Namespace:
118
+ namespace_param = name
119
+ continue
120
+
121
+ # Handle regular parameters
122
+ if name not in params:
123
+ if value.default != inspect.Parameter.empty:
124
+ # Default value is given in the function signature, we can use it
125
+ bound_args[name] = value.default
126
+ else:
127
+ # This is a required parameter that's missing
128
+ missing_required_args.append(name)
129
+ else:
130
+ # We have a parameter of this name, lets bind it
131
+ param_value = params[name]
132
+
133
+ if (
134
+ inspect.isclass(value.annotation)
135
+ and issubclass(value.annotation, BaseModel)
136
+ ) and not isinstance(param_value, ObjectParameter):
137
+ # Even if the annotation is a pydantic model, it can be passed as an object parameter
138
+ # We try to cast it as a pydantic model if asked
139
+ named_param = params[name].get_value()
140
+
141
+ if not isinstance(named_param, dict):
142
+ # A case where the parameter is a one attribute model
143
+ named_param = {name: named_param}
144
+
145
+ bound_model = bind_args_for_pydantic_model(
146
+ named_param, value.annotation
147
+ )
148
+ bound_args[name] = bound_model
149
+
150
+ elif value.annotation in [str, int, float, bool] and callable(
151
+ value.annotation
152
+ ):
153
+ # Cast it if its a primitive type. Ensure the type matches the annotation.
154
+ try:
155
+ bound_args[name] = value.annotation(params[name].get_value())
156
+ except (ValueError, TypeError) as e:
157
+ raise ValueError(
158
+ f"Cannot cast parameter '{name}' to {value.annotation.__name__}: {e}"
159
+ )
160
+ else:
161
+ # We do not know type of parameter, we send the value as found
162
+ bound_args[name] = params[name].get_value()
163
+
164
+ # Find extra parameters (parameters in params but not consumed by regular function parameters)
165
+ consumed_param_names = set(bound_args.keys()) | set(missing_required_args)
166
+ extra_params = {k: v for k, v in params.items() if k not in consumed_param_names}
167
+
168
+ # Second pass: Handle **kwargs and argparse.Namespace parameters
169
+ if var_keyword_param is not None:
170
+ # Function accepts **kwargs - add all extra parameters directly to bound_args
171
+ for param_name, param_value in extra_params.items():
172
+ bound_args[param_name] = param_value.get_value()
173
+ elif namespace_param is not None:
174
+ # Function accepts argparse.Namespace - create namespace with extra parameters
175
+ args_namespace = argparse.Namespace()
176
+ for param_name, param_value in extra_params.items():
177
+ setattr(args_namespace, param_name, param_value.get_value())
178
+ bound_args[namespace_param] = args_namespace
179
+ elif extra_params:
180
+ # Function doesn't accept **kwargs or namespace, but we have extra parameters
181
+ # This should only be an error if we also have missing required parameters
182
+ # or if the function truly can't handle the extra parameters
183
+ if missing_required_args:
184
+ # We have both missing required and extra parameters - this is an error
185
+ raise ValueError(
186
+ f"Function {func.__name__} has parameters {missing_required_args} that are not present in the parameters"
187
+ )
188
+ # If we only have extra parameters and no missing required ones, we just ignore the extras
189
+ # This allows for more flexible parameter passing
190
+
191
+ # Check for missing required parameters
192
+ if missing_required_args:
193
+ if var_keyword_param is None and namespace_param is None:
194
+ # No way to handle missing parameters
195
+ raise ValueError(
196
+ f"Function {func.__name__} has parameters {missing_required_args} that are not present in the parameters"
197
+ )
198
+ # If we have **kwargs or namespace, missing parameters might be handled there
199
+ # But if they're truly required (no default), we should still error
200
+ raise ValueError(
201
+ f"Function {func.__name__} has parameters {missing_required_args} that are not present in the parameters"
202
+ )
203
+
204
+ return bound_args
205
+
206
+
207
+ def bind_args_for_pydantic_model(
208
+ params: Dict[str, Any], model: Type[BaseModel]
209
+ ) -> BaseModel:
210
+ class EasyModel(model): # type: ignore
211
+ model_config = ConfigDict(extra="ignore")
212
+
213
+ swallow_all = EasyModel(**params)
214
+ bound_model = model(**swallow_all.model_dump())
215
+ return bound_model
@@ -60,6 +60,7 @@ class Catalog(BaseModel):
60
60
  Attributes:
61
61
  get (List[str]): List of glob patterns to get from central catalog to the compute data folder.
62
62
  put (List[str]): List of glob patterns to put into central catalog from the compute data folder.
63
+ store_copy (bool): Whether to store a copy of the data in the central catalog.
63
64
 
64
65
  Examples:
65
66
  >>> from runnable import Catalog
@@ -74,6 +75,7 @@ class Catalog(BaseModel):
74
75
  # compute_data_folder: str = Field(default="", alias="compute_data_folder")
75
76
  get: List[str] = Field(default_factory=list, alias="get")
76
77
  put: List[str] = Field(default_factory=list, alias="put")
78
+ store_copy: bool = Field(default=True, alias="store_copy")
77
79
 
78
80
 
79
81
  class BaseTraversal(ABC, BaseModel):
@@ -845,6 +847,11 @@ class BaseJob(BaseModel):
845
847
  return []
846
848
  return self.catalog.put
847
849
 
850
+ def return_bool_catalog_store_copy(self) -> bool:
851
+ if self.catalog is None:
852
+ return True
853
+ return self.catalog.store_copy
854
+
848
855
  def _is_called_for_definition(self) -> bool:
849
856
  """
850
857
  If the run context is set, we are coming in only to get the pipeline definition.
@@ -888,6 +895,7 @@ class BaseJob(BaseModel):
888
895
  }
889
896
 
890
897
  run_context = context.JobContext.model_validate(configurations)
898
+ run_context.catalog_store_copy = self.return_bool_catalog_store_copy()
891
899
 
892
900
  assert isinstance(run_context.job_executor, BaseJobExecutor)
893
901
 
@@ -1,144 +0,0 @@
1
- import inspect
2
- import json
3
- import logging
4
- import os
5
- from typing import Any, Dict, Type
6
-
7
- import pydantic
8
- from pydantic import BaseModel, ConfigDict
9
- from typing_extensions import Callable
10
-
11
- from runnable import defaults
12
- from runnable.datastore import JsonParameter, ObjectParameter
13
- from runnable.defaults import MapVariableType
14
- from runnable.utils import remove_prefix
15
-
16
- logger = logging.getLogger(defaults.LOGGER_NAME)
17
-
18
-
19
- def get_user_set_parameters(remove: bool = False) -> Dict[str, JsonParameter]:
20
- """
21
- Scans the environment variables for any user returned parameters that have a prefix runnable_PRM_.
22
-
23
- This function does not deal with any type conversion of the parameters.
24
- It just deserializes the parameters and returns them as a dictionary.
25
-
26
- Args:
27
- remove (bool, optional): Flag to remove the parameter if needed. Defaults to False.
28
-
29
- Returns:
30
- dict: The dictionary of found user returned parameters
31
- """
32
- parameters: Dict[str, JsonParameter] = {}
33
- for env_var, value in os.environ.items():
34
- if env_var.startswith(defaults.PARAMETER_PREFIX):
35
- key = remove_prefix(env_var, defaults.PARAMETER_PREFIX)
36
- try:
37
- parameters[key.lower()] = JsonParameter(
38
- kind="json", value=json.loads(value)
39
- )
40
- except json.decoder.JSONDecodeError:
41
- logger.warning(
42
- f"Parameter {key} could not be JSON decoded, adding the literal value"
43
- )
44
- parameters[key.lower()] = JsonParameter(kind="json", value=value)
45
-
46
- if remove:
47
- del os.environ[env_var]
48
- return parameters
49
-
50
-
51
- def filter_arguments_for_func(
52
- func: Callable[..., Any],
53
- params: Dict[str, Any],
54
- map_variable: MapVariableType = None,
55
- ) -> Dict[str, Any]:
56
- """
57
- Inspects the function to be called as part of the pipeline to find the arguments of the function.
58
- Matches the function arguments to the parameters available either by command line or by up stream steps.
59
-
60
-
61
- Args:
62
- func (Callable): The function to inspect
63
- parameters (dict): The parameters available for the run
64
-
65
- Returns:
66
- dict: The parameters matching the function signature
67
- """
68
- function_args = inspect.signature(func).parameters
69
-
70
- # Update parameters with the map variables
71
- for key, v in (map_variable or {}).items():
72
- params[key] = JsonParameter(kind="json", value=v)
73
-
74
- bound_args = {}
75
- unassigned_params = set(params.keys())
76
- # Check if VAR_KEYWORD is used, it is we send back everything
77
- # If **kwargs is present in the function signature, we send back everything
78
- for name, value in function_args.items():
79
- if value.kind != inspect.Parameter.VAR_KEYWORD:
80
- continue
81
- # Found VAR_KEYWORD, we send back everything as found
82
- for key, value in params.items():
83
- bound_args[key] = params[key].get_value()
84
-
85
- return bound_args
86
-
87
- # Lets return what is asked for then!!
88
- for name, value in function_args.items():
89
- # Ignore any *args
90
- if value.kind == inspect.Parameter.VAR_POSITIONAL:
91
- logger.warning(f"Ignoring parameter {name} as it is VAR_POSITIONAL")
92
- continue
93
-
94
- if name not in params:
95
- # No parameter of this name was provided
96
- if value.default == inspect.Parameter.empty:
97
- # No default value is given in the function signature. error as parameter is required.
98
- raise ValueError(
99
- f"Parameter {name} is required for {func.__name__} but not provided"
100
- )
101
- # default value is given in the function signature, nothing further to do.
102
- continue
103
-
104
- param_value = params[name]
105
-
106
- if type(value.annotation) in [
107
- BaseModel,
108
- pydantic._internal._model_construction.ModelMetaclass,
109
- ] and not isinstance(param_value, ObjectParameter):
110
- # Even if the annotation is a pydantic model, it can be passed as an object parameter
111
- # We try to cast it as a pydantic model if asked
112
- named_param = params[name].get_value()
113
-
114
- if not isinstance(named_param, dict):
115
- # A case where the parameter is a one attribute model
116
- named_param = {name: named_param}
117
-
118
- bound_model = bind_args_for_pydantic_model(named_param, value.annotation)
119
- bound_args[name] = bound_model
120
-
121
- elif value.annotation in [str, int, float, bool]:
122
- # Cast it if its a primitive type. Ensure the type matches the annotation.
123
- bound_args[name] = value.annotation(params[name].get_value())
124
- else:
125
- bound_args[name] = params[name].get_value()
126
-
127
- unassigned_params.remove(name)
128
-
129
- params = {
130
- key: params[key] for key in unassigned_params
131
- } # remove keys from params if they are assigned
132
-
133
- return bound_args
134
-
135
-
136
- def bind_args_for_pydantic_model(
137
- params: Dict[str, Any], model: Type[BaseModel]
138
- ) -> BaseModel:
139
- class EasyModel(model): # type: ignore
140
- model_config = ConfigDict(extra="ignore")
141
-
142
- swallow_all = EasyModel(**params)
143
- bound_model = model(**swallow_all.model_dump())
144
- return bound_model
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes