cloe-nessy 0.3.18__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. cloe_nessy/clients/api_client/__init__.py +10 -1
  2. cloe_nessy/clients/api_client/api_client.py +19 -8
  3. cloe_nessy/clients/api_client/api_response.py +7 -4
  4. cloe_nessy/clients/api_client/pagination_config.py +84 -0
  5. cloe_nessy/clients/api_client/pagination_strategy.py +500 -0
  6. cloe_nessy/file_utilities/get_file_paths.py +2 -1
  7. cloe_nessy/file_utilities/strategies/base_strategy.py +2 -1
  8. cloe_nessy/file_utilities/strategies/local_strategy.py +2 -1
  9. cloe_nessy/file_utilities/strategies/onelake_strategy.py +3 -1
  10. cloe_nessy/file_utilities/strategies/utils_strategy.py +3 -1
  11. cloe_nessy/integration/reader/__init__.py +2 -2
  12. cloe_nessy/integration/reader/api_reader.py +463 -72
  13. cloe_nessy/integration/reader/catalog_reader.py +6 -4
  14. cloe_nessy/integration/reader/excel_reader.py +3 -3
  15. cloe_nessy/integration/reader/file_reader.py +3 -1
  16. cloe_nessy/integration/reader/reader.py +1 -1
  17. cloe_nessy/integration/writer/catalog_writer.py +1 -1
  18. cloe_nessy/integration/writer/delta_writer/delta_append_writer.py +1 -1
  19. cloe_nessy/integration/writer/delta_writer/delta_writer_base.py +1 -1
  20. cloe_nessy/pipeline/__init__.py +9 -1
  21. cloe_nessy/pipeline/actions/__init__.py +3 -1
  22. cloe_nessy/pipeline/actions/read_api.py +272 -75
  23. cloe_nessy/pipeline/actions/read_catalog_table.py +0 -2
  24. cloe_nessy/pipeline/actions/read_excel.py +1 -1
  25. cloe_nessy/pipeline/actions/transform_decode.py +2 -1
  26. cloe_nessy/pipeline/actions/transform_hash_columns.py +4 -2
  27. cloe_nessy/pipeline/actions/transform_with_column.py +104 -0
  28. cloe_nessy/pipeline/actions/write_delta_append.py +0 -3
  29. cloe_nessy/pipeline/actions/write_delta_merge.py +0 -3
  30. cloe_nessy/pipeline/pipeline_builder.py +210 -0
  31. cloe_nessy/pipeline/pipeline_config.py +2 -0
  32. cloe_nessy/pipeline/pipeline_context.py +1 -1
  33. cloe_nessy/pipeline/pipeline_parsing_service.py +104 -39
  34. cloe_nessy/pipeline/pipeline_step.py +2 -0
  35. cloe_nessy/session/__init__.py +2 -1
  36. cloe_nessy/session/pyspark_compat.py +15 -0
  37. cloe_nessy/session/session_manager.py +1 -1
  38. {cloe_nessy-0.3.18.dist-info → cloe_nessy-1.0.0.dist-info}/METADATA +4 -4
  39. {cloe_nessy-0.3.18.dist-info → cloe_nessy-1.0.0.dist-info}/RECORD +40 -35
  40. {cloe_nessy-0.3.18.dist-info → cloe_nessy-1.0.0.dist-info}/WHEEL +1 -1
@@ -0,0 +1,104 @@
1
+ """Transform action to add or update a column using a SQL expression."""
2
+
3
+ from typing import Any
4
+
5
+ from pyspark.sql import functions as F
6
+
7
+ from cloe_nessy.pipeline.pipeline_action import PipelineAction
8
+ from cloe_nessy.pipeline.pipeline_context import PipelineContext
9
+
10
+
11
+ class TransformWithColumnAction(PipelineAction):
12
+ """Add or update a column in the DataFrame using a SQL expression.
13
+
14
+ This action uses PySpark's expr() function to evaluate SQL expressions and
15
+ create or update columns in the DataFrame.
16
+
17
+ Examples:
18
+ === "Create new column"
19
+ ```yaml
20
+ Create Full Name:
21
+ action: TRANSFORM_WITH_COLUMN
22
+ options:
23
+ column_name: full_name
24
+ expression: concat(first_name, ' ', last_name)
25
+ ```
26
+
27
+ === "Update existing column"
28
+ ```yaml
29
+ Lowercase Email:
30
+ action: TRANSFORM_WITH_COLUMN
31
+ options:
32
+ column_name: email
33
+ expression: lower(email)
34
+ ```
35
+
36
+ === "Calculated column"
37
+ ```yaml
38
+ Calculate Total:
39
+ action: TRANSFORM_WITH_COLUMN
40
+ options:
41
+ column_name: total_price
42
+ expression: price * quantity * (1 + tax_rate)
43
+ ```
44
+
45
+ === "Extract date parts"
46
+ ```yaml
47
+ Extract Year:
48
+ action: TRANSFORM_WITH_COLUMN
49
+ options:
50
+ column_name: year
51
+ expression: year(order_date)
52
+ ```
53
+ """
54
+
55
+ name: str = "TRANSFORM_WITH_COLUMN"
56
+
57
+ def run(
58
+ self,
59
+ context: PipelineContext,
60
+ *,
61
+ column_name: str = "",
62
+ expression: str = "",
63
+ **_: Any,
64
+ ) -> PipelineContext:
65
+ """Add or update a column using a SQL expression.
66
+
67
+ Args:
68
+ context: The pipeline context containing the DataFrame
69
+ column_name: Name of the column to create or update
70
+ expression: SQL expression to evaluate for the column value
71
+ **_: Additional unused keyword arguments
72
+
73
+ Returns:
74
+ PipelineContext: Updated context with the modified DataFrame
75
+
76
+ Raises:
77
+ ValueError: If column_name is not provided
78
+ ValueError: If expression is not provided
79
+ ValueError: If context.data is None
80
+ Exception: If the SQL expression is invalid
81
+ """
82
+ if not column_name:
83
+ raise ValueError("No column_name provided.")
84
+
85
+ if not expression:
86
+ raise ValueError("No expression provided.")
87
+
88
+ if context.data is None:
89
+ raise ValueError("Data from context is required for transform_with_column")
90
+
91
+ self._console_logger.info(f"Adding/updating column '{column_name}' with expression: {expression}")
92
+
93
+ df = context.data
94
+
95
+ try:
96
+ # Use F.expr() to evaluate the SQL expression
97
+ df = df.withColumn(column_name, F.expr(expression))
98
+ except Exception as e:
99
+ self._console_logger.error(f"Failed to evaluate expression '{expression}' for column '{column_name}': {e}")
100
+ raise
101
+
102
+ self._console_logger.info(f"Successfully added/updated column '{column_name}'")
103
+
104
+ return context.from_existing(data=df)
@@ -19,9 +19,6 @@ class WriteDeltaAppendAction(PipelineAction):
19
19
  table_identifier: my_catalog.my_schema.my_table
20
20
  ignore_empty_df: false
21
21
  ```
22
-
23
- Returns:
24
- None.
25
22
  """
26
23
 
27
24
  name: str = "WRITE_DELTA_APPEND"
@@ -28,9 +28,6 @@ class WriteDeltaMergeAction(PipelineAction):
28
28
  when_not_matched_insert: true
29
29
  use_partition_pruning: true
30
30
  ```
31
-
32
- Returns:
33
- None.
34
31
  """
35
32
 
36
33
  name: str = "WRITE_DELTA_MERGE"
@@ -0,0 +1,210 @@
1
+ from collections import OrderedDict
2
+ from collections.abc import Callable
3
+ from typing import Any, Self
4
+
5
+ from .pipeline import Pipeline
6
+ from .pipeline_step import PipelineStep
7
+
8
+
9
+ class PipelineBuilder:
10
+ """Fluent API builder for creating Nessy pipelines programmatically.
11
+
12
+ This class provides a chainable interface for building pipelines using method calls
13
+ instead of YAML configuration. It dynamically creates methods for all available
14
+ PipelineActions.
15
+
16
+ Example:
17
+ ```python
18
+ pipeline = (PipelineBuilder("My Pipeline")
19
+ .read_files(location="data/*.csv", extension="csv")
20
+ .transform_clean_column_names()
21
+ .transform_filter(condition="amount > 1000")
22
+ .write_catalog_table(catalog="prod", schema="sales", table="results")
23
+ .build())
24
+
25
+ pipeline.run()
26
+ ```
27
+ """
28
+
29
+ def __init__(self, name: str) -> None:
30
+ """Initialize the pipeline builder.
31
+
32
+ Args:
33
+ name: The name of the pipeline.
34
+ """
35
+ self.name = name
36
+ self.steps: OrderedDict[str, PipelineStep] = OrderedDict()
37
+ self._step_counter = 0
38
+
39
+ def __getattr__(self, name: str) -> Callable[..., "PipelineBuilder"]:
40
+ """Dynamically create methods for pipeline actions.
41
+
42
+ This method is called when an attribute that doesn't exist is accessed.
43
+ It converts method calls like `read_files()` into the corresponding PipelineAction.
44
+
45
+ Args:
46
+ name: The method name being called.
47
+
48
+ Returns:
49
+ A callable that adds the corresponding pipeline step.
50
+
51
+ Raises:
52
+ AttributeError: If the method name doesn't correspond to a known action.
53
+ """
54
+ # Lazy import to avoid circular import issues
55
+ from .actions import pipeline_actions
56
+
57
+ # Convert method name to action name (e.g., read_files -> READ_FILES)
58
+ action_name = name.upper()
59
+
60
+ if action_name in pipeline_actions:
61
+ action_class = pipeline_actions[action_name]
62
+
63
+ def method(**kwargs: Any) -> "PipelineBuilder":
64
+ return self._add_step(action_class, **kwargs)
65
+
66
+ return method
67
+
68
+ raise AttributeError(
69
+ f"PipelineBuilder has no method '{name}'. Available actions: {list(pipeline_actions.keys())}"
70
+ )
71
+
72
+ def _add_step(self, action_class: type, step_name: str | None = None, **options: Any) -> Self:
73
+ """Add a step to the pipeline.
74
+
75
+ Args:
76
+ action_class: The PipelineAction class to instantiate.
77
+ step_name: Optional custom name for the step.
78
+ **options: Options to pass to the action.
79
+
80
+ Returns:
81
+ Self for method chaining.
82
+
83
+ Raises:
84
+ ValueError: If a step with the given name already exists.
85
+ """
86
+ if step_name is None:
87
+ step_name = f"step_{self._step_counter:03d}_{action_class.__name__}"
88
+
89
+ # Validate that step name is unique
90
+ if step_name in self.steps:
91
+ raise ValueError(
92
+ f"A step with name '{step_name}' already exists in the pipeline. "
93
+ f"Please provide a unique step_name. "
94
+ f"Existing steps: {list(self.steps.keys())}"
95
+ )
96
+
97
+ # Convert any PipelineBuilder instances in options to PipelineStep references
98
+ options = self._convert_builder_references(options)
99
+
100
+ # Set up context reference to previous step
101
+ context_ref = None
102
+ if self.steps:
103
+ context_ref = list(self.steps.keys())[-1]
104
+
105
+ step = PipelineStep(name=step_name, action=action_class(), options=options, _context_ref=context_ref)
106
+
107
+ # Remove any predecessors that are from already-executed external pipelines
108
+ # (these steps have results but aren't in our pipeline)
109
+ external_predecessors = set()
110
+ for pred_name in step._predecessors:
111
+ if pred_name not in self.steps and pred_name != context_ref:
112
+ # Check if this is a reference to an executed step from options
113
+ for opt_val in options.values():
114
+ if isinstance(opt_val, PipelineStep) and opt_val.name == pred_name:
115
+ # This is an external executed step, remove from predecessors
116
+ external_predecessors.add(pred_name)
117
+ break
118
+
119
+ step._predecessors -= external_predecessors
120
+
121
+ self.steps[step_name] = step
122
+ self._step_counter += 1
123
+ return self
124
+
125
+ def _convert_builder_references(self, options: dict[str, Any]) -> dict[str, Any]:
126
+ """Convert any PipelineBuilder instances in options to PipelineStep references.
127
+
128
+ This method recursively processes options to find PipelineBuilder instances and
129
+ converts them to their last step's PipelineStep reference. This allows users to
130
+ pass PipelineBuilder instances directly to actions that expect PipelineStep references.
131
+
132
+ Handles PipelineBuilder instances in:
133
+ - Direct values
134
+ - Lists
135
+ - Nested dictionaries
136
+
137
+ Args:
138
+ options: Dictionary of options that may contain PipelineBuilder instances.
139
+
140
+ Returns:
141
+ Dictionary with PipelineBuilder instances converted to PipelineStep references.
142
+
143
+ Raises:
144
+ ValueError: If a PipelineBuilder has no steps.
145
+ """
146
+ converted = {}
147
+ for key, value in options.items():
148
+ converted[key] = self._convert_value(value, key)
149
+ return converted
150
+
151
+ def _convert_value(self, value: Any, context: str = "") -> Any:
152
+ """Recursively convert a value, handling PipelineBuilder instances.
153
+
154
+ When a PipelineBuilder is passed as a value, it is executed immediately
155
+ and its last step is returned as the reference. This allows the pipeline
156
+ to be run before the main pipeline that references it.
157
+
158
+ Args:
159
+ value: The value to convert.
160
+ context: Context string for error messages (e.g., key name).
161
+
162
+ Returns:
163
+ The converted value.
164
+ """
165
+ if isinstance(value, PipelineBuilder):
166
+ # Build and run the referenced pipeline immediately
167
+ pipeline = value.build()
168
+ if not pipeline.steps:
169
+ context_msg = f" in '{context}'" if context else ""
170
+ raise ValueError(f"PipelineBuilder{context_msg} must have at least one step")
171
+
172
+ # Run the pipeline to populate the results
173
+ pipeline.run()
174
+
175
+ # Get the last step which now has results
176
+ last_step_name = list(pipeline.steps.keys())[-1]
177
+ last_step = pipeline.steps[last_step_name]
178
+
179
+ # Clear predecessors since this step is already executed and has its result
180
+ # This prevents the main pipeline from trying to resolve dependencies
181
+ # that don't exist in its own step dictionary
182
+ last_step._predecessors = set()
183
+ last_step._context_ref = None
184
+
185
+ return last_step
186
+ if isinstance(value, dict):
187
+ # Recursively convert nested dictionaries
188
+ return {k: self._convert_value(v, f"{context}.{k}" if context else k) for k, v in value.items()}
189
+ if isinstance(value, list):
190
+ # Recursively convert lists
191
+ return [
192
+ self._convert_value(item, f"{context}[{i}]" if context else f"[{i}]") for i, item in enumerate(value)
193
+ ]
194
+ return value
195
+
196
+ def build(self) -> Pipeline:
197
+ """Build the pipeline from the configured steps.
198
+
199
+ Returns:
200
+ A Pipeline object ready for execution.
201
+ """
202
+ return Pipeline(name=self.name, steps=self.steps)
203
+
204
+ def run(self) -> None:
205
+ """Build and run the pipeline immediately.
206
+
207
+ This is a convenience method equivalent to calling build().run().
208
+ """
209
+ pipeline = self.build()
210
+ pipeline.run()
@@ -83,6 +83,7 @@ class PipelineStepConfig(PipelineConfigBaseModel):
83
83
  context: str | None = None
84
84
  table_metadata: str | None = None
85
85
  options: dict = Field(default_factory=dict)
86
+ env: dict = Field(default_factory=dict)
86
87
 
87
88
 
88
89
  class PipelineConfig(PipelineConfigBaseModel):
@@ -90,3 +91,4 @@ class PipelineConfig(PipelineConfigBaseModel):
90
91
 
91
92
  name: str
92
93
  steps: OrderedDict[str, PipelineStepConfig]
94
+ env: dict[str, str] = Field(default_factory=dict)
@@ -1,6 +1,6 @@
1
1
  from typing import Any
2
2
 
3
- from pyspark.sql import DataFrame
3
+ from cloe_nessy.session import DataFrame
4
4
 
5
5
  from ..models import Table
6
6
 
@@ -3,6 +3,7 @@ import re
3
3
  from collections import OrderedDict
4
4
  from enum import Enum
5
5
  from pathlib import Path
6
+ from typing import Any
6
7
 
7
8
  import yaml
8
9
 
@@ -10,7 +11,7 @@ from ..logging import LoggerMixin
10
11
  from ..session import SessionManager
11
12
  from .actions import PipelineActionType, pipeline_actions
12
13
  from .pipeline import Pipeline
13
- from .pipeline_config import PipelineConfig
14
+ from .pipeline_config import PipelineConfig, PipelineStepConfig
14
15
  from .pipeline_step import PipelineStep
15
16
 
16
17
 
@@ -63,49 +64,22 @@ class PipelineParsingService:
63
64
  if not yaml_str:
64
65
  raise ValueError("YAML content is empty.")
65
66
 
66
- final_yaml_str = PipelineParsingService._replace_variables(yaml_str)
67
- config = yaml.safe_load(final_yaml_str)
67
+ secrets_repl_yaml_str = PipelineParsingService._replace_secret_refs(yaml_str)
68
+ fixed_yaml_str = PipelineParsingService._fix_yaml_str_with_templates(secrets_repl_yaml_str)
69
+ config = yaml.safe_load(fixed_yaml_str)
68
70
  pipeline_config = PipelineConfig.metadata_to_instance(config)
69
- steps = PipelineParsingService._get_steps(pipeline_config.steps)
71
+ steps = PipelineParsingService._get_steps(pipeline_config.steps, pipeline_config.env)
70
72
  pipeline = Pipeline(name=pipeline_config.name, steps=steps) # type: ignore
71
73
  console_logger.info("Pipeline [ '%s' ] parsed successfully with %d steps.", pipeline.name, len(pipeline.steps))
72
74
  return pipeline
73
75
 
74
76
  @staticmethod
75
- def _replace_variables(yaml_str: str) -> str:
76
- """Replace variable placeholders in a YAML string.
77
-
78
- Replaces environment variables with the pattern `{{env:var-name}}`. Where
79
- the var-name is the name of the environment variable. Replaces secret
80
- references with the pattern `{{secret-scope-name:secret-key}}`. Where
81
- scope-name is the name of the secret scope and secret-key is the key of
82
- the secret.
83
-
84
- Args:
85
- yaml_str: A string that can be parsed in YAML format.
86
-
87
- Returns:
88
- The same YAML string with environment variable placeholders replaced.
89
- """
90
- env_var_pattern = r"\{\{env:([^}]+)\}\}"
91
- secret_ref_pattern = r"\{\{(?!step|env)([^}]+):([^}]+)\}\}"
92
-
93
- def replace_with_env_var(match):
94
- env_var_name = match.group(1)
95
- env_var_value = os.getenv(env_var_name)
96
- return env_var_value
97
-
98
- def replace_with_secret(match):
99
- secret_scope_name = match.group(1)
100
- secret_key = match.group(2)
101
- return SessionManager.get_utils().secrets.get(scope=secret_scope_name, key=secret_key)
102
-
103
- env_replaced_yaml_string = re.sub(env_var_pattern, replace_with_env_var, yaml_str)
104
- final_yaml_string = re.sub(secret_ref_pattern, replace_with_secret, env_replaced_yaml_string)
105
- return final_yaml_string
106
-
107
- @staticmethod
108
- def _get_steps(step_configs, last_step_name: str | None = None):
77
+ def _get_steps(
78
+ step_configs: OrderedDict[str, PipelineStepConfig],
79
+ pipeline_env: dict[str, str],
80
+ last_step_name: str | None = None,
81
+ ) -> OrderedDict[str, PipelineStep]:
82
+ os_env = dict(os.environ)
109
83
  steps = OrderedDict()
110
84
  for step_name, step_config in step_configs.items():
111
85
  is_successor = step_config.is_successor
@@ -115,19 +89,99 @@ class PipelineParsingService:
115
89
  action = PipelineActionType[step_config.action.name].value()
116
90
  step = PipelineStep(
117
91
  name=step_name,
92
+ env=step_config.env,
118
93
  action=action,
119
94
  options=step_config.options,
120
95
  _context_ref=context_ref,
121
96
  _table_metadata_ref=step_config.table_metadata,
122
97
  )
123
- steps[step.name] = step
98
+ steps[step.name] = PipelineParsingService._resolve_env_vars(step, os_env, pipeline_env)
124
99
  last_step_name = step_name
125
100
  for step in steps.values():
126
101
  steps[step.name] = PipelineParsingService._replace_step_refs(steps, step)
127
102
  return steps
128
103
 
104
+ @staticmethod
105
+ def _replace_secret_refs(yaml_str: str) -> str:
106
+ """Replaces secret reference placeholders in a YAML string.
107
+
108
+ Replaces secret references with the pattern `{{secret-scope-name:secret-key}}`.
109
+ Where scope-name is the name of the secret scope and secret-key is the key of the secret.
110
+
111
+ Args:
112
+ yaml_str: A string that can be parsed in YAML format.
113
+
114
+ Returns:
115
+ The same YAML string with secret reference placeholders replaced.
116
+ """
117
+ secret_ref_pattern = r"\{\{(?!(?:env|step):)([^}]+):([^}]+)\}\}"
118
+
119
+ def replace_with_secret(match):
120
+ secret_scope_name = match.group(1)
121
+ secret_key = match.group(2)
122
+ return SessionManager.get_utils().secrets.get(scope=secret_scope_name, key=secret_key)
123
+
124
+ return re.sub(secret_ref_pattern, replace_with_secret, yaml_str)
125
+
126
+ @staticmethod
127
+ def _resolve_env_vars(step: PipelineStep, os_env: dict[str, str], pipeline_env: dict[str, str]) -> PipelineStep:
128
+ """Resolves environment variable placeholders in step definition.
129
+
130
+ Resolves environment variables with the pattern `{{env:var-name}}`,
131
+ where the `var-name` is the name of the environment variable.
132
+
133
+ Args:
134
+ step: Step definition, where replacement is occurred.
135
+ os_env: OS scope environment variable.
136
+ pipeline_env: Pipeline scope environment variables.
137
+
138
+ Returns:
139
+ The same step definition with environment variable placeholders replaced.
140
+
141
+ Raises:
142
+ KeyError: If the specified key is not found in the environment variables.
143
+ """
144
+ env_var_pattern = re.compile(r"\{\{env:([A-Z_][A-Z0-9_]*)\}\}")
145
+
146
+ def _resolve_object(obj: Any) -> Any:
147
+ if isinstance(obj, str):
148
+ return _resolve_string(obj)
149
+ if isinstance(obj, list):
150
+ return [_resolve_object(i) for i in obj]
151
+ if isinstance(obj, dict):
152
+ return {k: _resolve_object(v) for k, v in obj.items()}
153
+ return obj
154
+
155
+ def _resolve_string(value: str) -> str:
156
+ def repl(match):
157
+ key = match.group(1)
158
+ if key not in effective_env:
159
+ raise KeyError(f"Environment variable '{key}' is not defined")
160
+ return str(effective_env[key])
161
+
162
+ return env_var_pattern.sub(repl, value)
163
+
164
+ if step.options:
165
+ effective_env = {**os_env, **pipeline_env, **step.env}
166
+ for option, value in step.options.items():
167
+ step.options[option] = _resolve_object(value)
168
+
169
+ return step
170
+
129
171
  @staticmethod
130
172
  def _replace_step_refs(steps: OrderedDict[str, PipelineStep], step: PipelineStep) -> PipelineStep:
173
+ """Replaces other steps reference placeholders in a step definition.
174
+
175
+ Replaces other steps references with the pattern `((step:step-name))`.
176
+ Where the `step-name` is the name of the referenced step.
177
+
178
+ Args:
179
+ steps: All pipeline steps definitions.
180
+ step: Step definition, where replacement is occurred.
181
+
182
+ Returns:
183
+ The same step definition with referenced step names replaced.
184
+ """
131
185
  step_ref_pattern = r"\(\(step:([^)]+)\)\)"
132
186
 
133
187
  def _handle_string_value(value: str, option: str):
@@ -154,3 +208,14 @@ class PipelineParsingService:
154
208
  _handle_list_value(value, option)
155
209
 
156
210
  return step
211
+
212
+ @staticmethod
213
+ def _fix_yaml_str_with_templates(yaml_str: str) -> str:
214
+ """Fixes unquoted {{env:...}} templates before yaml.safe_load."""
215
+ unquoted_template = re.compile(r"(:)\s*(\{\{env:[^}]+\}\})(?=\s*$|\s+#)", re.MULTILINE)
216
+
217
+ def replacer(match):
218
+ colon, template = match.groups()
219
+ return f'{colon} "{template}"'
220
+
221
+ return unquoted_template.sub(replacer, yaml_str)
@@ -15,6 +15,7 @@ class PipelineStep:
15
15
  Attributes:
16
16
  name: The name of the step.
17
17
  action: The action to be executed.
18
+ env: The step environment variables.
18
19
  is_successor: A boolean indicating if the step is a successor and takes
19
20
  the previous steps context.
20
21
  context: The context of the step.
@@ -26,6 +27,7 @@ class PipelineStep:
26
27
 
27
28
  name: str
28
29
  action: PipelineAction
30
+ env: dict[str, str] = field(default_factory=lambda: {})
29
31
  context: PipelineContext = field(default_factory=lambda: PipelineContext())
30
32
  options: dict[str, Any] = field(default_factory=lambda: {})
31
33
  result: PipelineContext = field(default_factory=lambda: PipelineContext())
@@ -1,3 +1,4 @@
1
+ from .pyspark_compat import DataFrame, SparkSession
1
2
  from .session_manager import SessionManager
2
3
 
3
- __all__ = ["SessionManager"]
4
+ __all__ = ["SessionManager", "DataFrame", "SparkSession"]
@@ -0,0 +1,15 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from pyspark.sql.utils import is_remote
4
+
5
+ if TYPE_CHECKING:
6
+ from pyspark.sql import Column, DataFrame, SparkSession
7
+ else:
8
+ # Real runtime imports
9
+ if is_remote():
10
+ from pyspark.sql.connect.dataframe import DataFrame
11
+ from pyspark.sql.connect.session import SparkSession
12
+ else:
13
+ from pyspark.sql import DataFrame, SparkSession
14
+
15
+ __all__ = ["SparkSession", "DataFrame", "Column"]
@@ -3,7 +3,7 @@ import os
3
3
  from enum import Enum
4
4
  from typing import Any
5
5
 
6
- from pyspark.sql import SparkSession
6
+ from cloe_nessy.session import SparkSession
7
7
 
8
8
  from ..logging import LoggerMixin
9
9
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.18
3
+ Version: 1.0.0
4
4
  Summary: Your friendly datalake monster.
5
5
  Project-URL: homepage, https://initions.com/
6
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
@@ -16,7 +16,7 @@ Requires-Python: <3.13,>=3.11
16
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
17
17
  Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
18
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
19
- Requires-Dist: fsspec<2025.7.1,>=2025.7.0
19
+ Requires-Dist: fsspec<2025.12.1,>=2025.12.0
20
20
  Requires-Dist: httpx<1.0.0,>=0.27.2
21
21
  Requires-Dist: jinja2<4.0.0,>=3.1.4
22
22
  Requires-Dist: matplotlib<4.0.0,>=3.9.2
@@ -58,12 +58,12 @@ Extract-Transform-Load (ETL) Workflow.
58
58
 
59
59
  When you are contributing, please refer to our Contribution Guide in the *nessy*
60
60
  Docs
61
- [here](https://yellow-mud-0b9177e03.2.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
61
+ [here](https://mango-tree-0b8dd3b03.1.azurestaticapps.net/tool_docs/nessy/Developer-Guide/)!
62
62
 
63
63
  ## Usage
64
64
 
65
65
  Please find the User Guide
66
- [here](https://yellow-mud-0b9177e03.2.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
66
+ [here](https://mango-tree-0b8dd3b03.1.azurestaticapps.net/tool_docs/nessy/User-Guide/)!
67
67
 
68
68
  ## Contact
69
69