cloe-nessy 0.3.17.0__py3-none-any.whl → 0.3.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -102,7 +102,7 @@ class DeltaLoader(ABC, LoggerMixin):
102
102
  ),
103
103
  )
104
104
  catalog_writer = CatalogWriter()
105
- catalog_writer.write_table(
105
+ catalog_writer.write(
106
106
  df=metadata_df,
107
107
  table_identifier=self.metadata_table_identifier,
108
108
  mode="append",
@@ -71,9 +71,46 @@ class CatalogReader(BaseReader):
71
71
  except AnalysisException as err:
72
72
  raise ValueError(f"Table not found: {table_identifier}") from err
73
73
  except Exception as err:
74
- if delta_load_options:
75
- raise ReadOperationFailedError(f"Delta load failed for table '{table_identifier}': {err}") from err
76
- else:
77
- raise ReadOperationFailedError(
78
- f"An error occurred while reading the table '{table_identifier}': {err}"
79
- ) from err
74
+ raise ReadOperationFailedError(
75
+ f"An error occurred while reading the table '{table_identifier}': {err}"
76
+ ) from err
77
+
78
+ def read_stream(
79
+ self,
80
+ table_identifier: str = "",
81
+ *,
82
+ options: dict[str, str] | None = None,
83
+ **kwargs: Any,
84
+ ) -> DataFrame:
85
+ """Reads a streaming table from the Unity Catalog.
86
+
87
+ Args:
88
+ table_identifier: The table identifier in the Unity Catalog in the format 'catalog.schema.table'.
89
+ options: PySpark options for the read stream operation.
90
+ **kwargs: Additional keyword arguments to maintain compatibility with the base class method.
91
+
92
+ Returns:
93
+ The Spark Streaming DataFrame containing the read data.
94
+
95
+ Raises:
96
+ ValueError: If the table_identifier is not provided, is not a string, or is not in the correct format.
97
+ Exception: For any other unexpected errors during streaming read operation.
98
+ """
99
+ if options is None:
100
+ options = {}
101
+ if not table_identifier:
102
+ raise ValueError("table_identifier is required")
103
+ if not isinstance(table_identifier, str):
104
+ raise ValueError("table_identifier must be a string")
105
+ if len(table_identifier.split(".")) != 3:
106
+ raise ValueError("table_identifier must be in the format 'catalog.schema.table'")
107
+
108
+ try:
109
+ df = self._spark.readStream.table(table_identifier, **options)
110
+ return df
111
+ except AnalysisException as err:
112
+ raise ValueError(f"Table not found or not streamable: {table_identifier}") from err
113
+ except Exception as err:
114
+ raise ReadOperationFailedError(
115
+ f"An error occurred while reading the stream from table '{table_identifier}': {err}"
116
+ ) from err
@@ -5,7 +5,7 @@ class CatalogWriter:
5
5
  """A writer for Catalog tables."""
6
6
 
7
7
  @staticmethod
8
- def write_table(
8
+ def write(
9
9
  df: DataFrame | None,
10
10
  table_identifier: str | None,
11
11
  partition_by: str | list[str] | None = None,
@@ -46,3 +46,65 @@ class CatalogWriter:
46
46
  if options is None:
47
47
  options = {}
48
48
  df.write.saveAsTable(table_identifier, mode=mode, partitionBy=partition_by, **options)
49
+
50
+ @staticmethod
51
+ def write_stream(
52
+ df: DataFrame | None,
53
+ table_identifier: str | None,
54
+ checkpoint_location: str | None = None,
55
+ trigger_dict: dict | None = None,
56
+ options: dict[str, str] | None = None,
57
+ mode: str = "append",
58
+ await_termination: bool = False,
59
+ ) -> None:
60
+ """Write a streaming DataFrame to a Unity Catalog table.
61
+
62
+ Args:
63
+ df: The streaming DataFrame to write.
64
+ table_identifier: The table identifier in the Unity Catalog in the
65
+ format 'catalog.schema.table'.
66
+ checkpoint_location: Location for checkpointing. Required for stream recovery.
67
+ trigger_dict: A dictionary specifying the trigger configuration for the streaming query.
68
+ Supported keys include:
69
+ - "processingTime": Specifies a time interval (e.g., "10 seconds") for micro-batch processing.
70
+ - "once": Processes all available data once and then stops.
71
+ - "continuous": Specifies a time interval (e.g., "1 second") for continuous processing.
72
+ - "availableNow": Processes all available data immediately and then stops.
73
+ If nothing is provided, the default is {"availableNow": True}.
74
+ options: PySpark options for the DataFrame streaming write operation.
75
+ mode: The write mode. For streaming, typically "append".
76
+ await_termination: If True, the function will wait for the streaming
77
+ query to finish before returning.
78
+
79
+ Raises:
80
+ ValueError: If the mode is not supported for streaming operations.
81
+ ValueError: If the table_identifier is not a string or not in the format 'catalog.schema.table'.
82
+ ValueError: If the DataFrame is None.
83
+ ValueError: If checkpoint_location is not provided.
84
+ """
85
+ if mode not in ("append", "complete", "update"):
86
+ raise ValueError("mode must be one of append, complete, update for streaming operations")
87
+ if not table_identifier:
88
+ raise ValueError("table_identifier is required")
89
+ elif not isinstance(table_identifier, str):
90
+ raise ValueError("table_identifier must be a string")
91
+ elif len(table_identifier.split(".")) != 3:
92
+ raise ValueError("table_identifier must be in the format 'catalog.schema.table'")
93
+ if not df:
94
+ raise ValueError("df is required, but was None.")
95
+ if not checkpoint_location:
96
+ raise ValueError("checkpoint_location is required for streaming operations")
97
+
98
+ if options is None:
99
+ options = {}
100
+ if trigger_dict is None:
101
+ trigger_dict = {"availableNow": True}
102
+
103
+ stream_writer = df.writeStream.format("delta").outputMode(mode)
104
+ stream_writer.options(**options).option("checkpointLocation", checkpoint_location)
105
+ stream_writer.trigger(**trigger_dict)
106
+
107
+ query = stream_writer.toTable(table_identifier)
108
+
109
+ if await_termination:
110
+ query.awaitTermination()
@@ -196,7 +196,11 @@ class DeltaMergeWriter(BaseDeltaWriter):
196
196
 
197
197
  config = DeltaMergeConfig(dataframe_columns=data_frame.columns, **kwargs)
198
198
 
199
- delta_table = self.table_manager.get_delta_table(location=storage_path, spark=data_frame.sparkSession)
199
+ delta_table = self.table_manager.get_delta_table(
200
+ table=table,
201
+ location=storage_path if not table else None,
202
+ spark=data_frame.sparkSession,
203
+ )
200
204
 
201
205
  match_conditions = self._build_match_conditions(data_frame, config)
202
206
 
@@ -5,6 +5,7 @@ from pydantic import BaseModel, Field, field_validator, model_validator
5
5
 
6
6
  COLUMN_DATA_TYPE_LIST = {
7
7
  "string",
8
+ "decimal",
8
9
  "integer",
9
10
  "int",
10
11
  "smallint",
@@ -31,7 +32,7 @@ class Column(BaseModel):
31
32
  nullable: bool = True
32
33
  default_value: Any = None
33
34
  generated: str | None = None
34
- properties: dict[str, Any] = Field(default_factory=dict)
35
+ business_properties: dict[str, Any] = Field(default_factory=dict)
35
36
  comment: str | None = None
36
37
 
37
38
  @field_validator("data_type", mode="before")
@@ -43,7 +44,7 @@ class Column(BaseModel):
43
44
  """
44
45
  val = raw.lower()
45
46
  base_data_types = re.findall(r"\b[a-z]+\b", val)
46
- forbidden_characters = re.findall(r"[^a-z\<\>)]+", val)
47
+ forbidden_characters = re.findall(r"[^a-z0-9\(\)\<\>, ]+", val)
47
48
 
48
49
  if forbidden_characters:
49
50
  raise ValueError(f"Forbidden characters in data type definition [ '{val}' ]: [' {forbidden_characters} ']")
@@ -43,6 +43,7 @@ class Schema(ReadInstancesMixin):
43
43
  raise FileNotFoundError("Schema file not found.")
44
44
 
45
45
  schema, schema_errors = super().read_instance_from_file(processed_instance_path)
46
+ table_errors: list[ValidationErrorType] = []
46
47
  if schema:
47
48
  schema.storage_path = "" if not schema.storage_path else schema.storage_path
48
49
  tables, table_errors = Table.read_instances_from_directory(
@@ -13,6 +13,14 @@ USING delta
13
13
  {% if table.storage_path %}
14
14
  LOCATION '{{ table.storage_path }}'
15
15
  {% endif %}
16
+ {% if table.properties %}
17
+ TBLPROPERTIES (
18
+ {%- for key, value in table.properties.items() %}
19
+ {%- if not loop.first %}, {% endif -%}
20
+ '{{key}}' = '{{value}}'
21
+ {%- endfor -%}
22
+ )
23
+ {% endif %}
16
24
  {% if table.partition_by -%}
17
25
  {%- if table.liquid_clustering -%} CLUSTER {%- else -%} PARTITIONED {%- endif %} BY (
18
26
  {%- for column in table.partition_by -%}
@@ -34,3 +42,17 @@ ALTER TABLE {{ table.escaped_identifier }} ADD CONSTRAINT {{constraint.name}} CH
34
42
  {%- if table.comment %}
35
43
  COMMENT ON TABLE {{ table.escaped_identifier }} IS '{{ table.comment }}';
36
44
  {%- endif %}
45
+ {# Tags do not yet work in Databricks
46
+ {%- if table.business_properties %}
47
+ {%- for tag_key, tag_value in table.business_properties.items() %}
48
+ SET TAG ON TABLE {{ table.escaped_identifier }} `{{ tag_key }}`{% if tag_value %} = `{{ tag_value }}`{% endif %};
49
+ {%- endfor %}
50
+ {%- endif %}
51
+
52
+ {%- for column in table.columns %}
53
+ {%- if column.business_properties %}
54
+ {%- for tag_key, tag_value in column.business_properties.items() %}
55
+ SET TAG ON COLUMN {{ table.escaped_identifier }}.`{{ column.name }}` `{{ tag_key }}`{% if tag_value %} = `{{ tag_value }}`{% endif %};
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {%- endfor %} #}
@@ -110,7 +110,7 @@ class TableManager(LoggerMixin):
110
110
  self._spark.sql(f"USE CATALOG {table.catalog};")
111
111
  self._spark.sql(f"USE SCHEMA {table.schema};")
112
112
  for statement in table.get_create_statement(replace=replace).split(";"):
113
- if statement and statement != "\n":
113
+ if statement and statement.strip():
114
114
  self._spark.sql(statement)
115
115
 
116
116
  def drop_table(
@@ -186,6 +186,9 @@ class TableManager(LoggerMixin):
186
186
  def get_delta_table(self, table: Table | None = None, location: str | None = None, spark=None) -> DeltaTable:
187
187
  """Get the DeltaTable object from the Table objects location or a location string.
188
188
 
189
+ For managed tables, uses the table identifier to access the DeltaTable.
190
+ For external tables or when a location is provided, uses the storage path.
191
+
189
192
  Args:
190
193
  table: A Table object representing the Delta table.
191
194
  location: A string representing the table location.
@@ -195,17 +198,35 @@ class TableManager(LoggerMixin):
195
198
  The DeltaTable object corresponding to the given Table object or location string.
196
199
 
197
200
  Raises:
198
- ValueError: If neither table nor location is provided, or if both are provided.
201
+ ValueError: If neither table nor location is provided.
199
202
  """
200
- if (table is None and location is None) or (table is not None and location is not None):
203
+ if table is None and location is None:
201
204
  raise ValueError(
202
- f"Either table or location must be provided, but not both. Table: {table}, location: {location}",
205
+ f"Either table or location must be provided. Table: {table}, location: {location}",
206
+ )
207
+
208
+ spark_session = spark or self._spark
209
+
210
+ if table is not None and location is not None:
211
+ self._console_logger.info(
212
+ f"Both table ({table.identifier}) and location ({location}) provided. Using table object as priority."
203
213
  )
204
214
 
205
215
  if table is not None:
206
- location = str(table.storage_path)
216
+ if table.is_external is False:
217
+ self._console_logger.info(f"Getting DeltaTable object for managed table: {table.identifier}")
218
+ return DeltaTable.forName(spark_session, table.identifier)
219
+
220
+ table_location = str(table.storage_path)
221
+ self._console_logger.info(f"Getting DeltaTable object for external table location: {table_location}")
222
+ return DeltaTable.forPath(spark_session, table_location)
223
+
224
+ self._console_logger.info(f"No table object provided, using location: {location}")
225
+ if location is None:
226
+ self._console_logger.error("Location is None - this should not happen!")
227
+ raise ValueError("Location cannot be None when no table object is provided")
207
228
  self._console_logger.info(f"Getting DeltaTable object for location: {location}")
208
- return DeltaTable.forPath(spark or self._spark, str(location))
229
+ return DeltaTable.forPath(spark_session, str(location))
209
230
 
210
231
  def table_exists(self, table: Table | None = None, table_identifier: str | None = None) -> bool:
211
232
  """Checks if a table exists in the catalog.
@@ -235,9 +256,10 @@ class TableManager(LoggerMixin):
235
256
  raise ValueError("Invalid table identifier format. Expected 'catalog.schema.table'.")
236
257
 
237
258
  query_result = self._spark.sql(
259
+ # Using both upper and lower case to ensure compatibility with case changes in Databricks
238
260
  f"""
239
261
  SELECT 1 FROM {catalog}.information_schema.tables
240
- WHERE table_name = '{table_name}'
262
+ WHERE table_name in ('{table_name}', '{table_name.lower()}')
241
263
  AND table_schema = '{schema}'
242
264
  LIMIT 1""",
243
265
  )
@@ -23,13 +23,44 @@ class ReadCatalogTableAction(PipelineAction):
23
23
  options:
24
24
  table_identifier: my_catalog.business_schema.sales_table
25
25
  options: <options for the CatalogReader read method>
26
- delta_load_options:
27
- strategy: CDF
28
- delta_load_identifier: my_delta_load_id
29
- strategy_options:
30
- deduplication_columns: ["id"]
31
- enable_full_load: true
26
+ delta_load_options:
27
+ strategy: CDF
28
+ delta_load_identifier: my_delta_load_id
29
+ strategy_options:
30
+ deduplication_columns: ["id"]
31
+ enable_full_load: true
32
32
  ```
33
+ === "Batch Read"
34
+ ```yaml
35
+ Read Sales Table:
36
+ action: READ_CATALOG_TABLE
37
+ options:
38
+ table_identifier: my_catalog.business_schema.sales_table
39
+ options: <options for the CatalogReader read method>
40
+ ```
41
+ === "Streaming Read"
42
+ ```yaml
43
+ Read Sales Table Stream:
44
+ action: READ_CATALOG_TABLE
45
+ options:
46
+ table_identifier: my_catalog.business_schema.sales_table
47
+ stream: true
48
+ options: <options for the CatalogReader read_stream method>
49
+ ```
50
+ === "Delta Load Read"
51
+ ```yaml
52
+ Read Sales Table:
53
+ action: READ_CATALOG_TABLE
54
+ options:
55
+ table_identifier: my_catalog.business_schema.sales_table
56
+ options: <options for the CatalogReader read method>
57
+ delta_load_options:
58
+ strategy: CDF
59
+ delta_load_identifier: my_delta_load_id
60
+ strategy_options:
61
+ deduplication_columns: ["id"]
62
+ enable_full_load: true
63
+ ```
33
64
  """
34
65
 
35
66
  name: str = "READ_CATALOG_TABLE"
@@ -41,6 +72,7 @@ class ReadCatalogTableAction(PipelineAction):
41
72
  table_identifier: str | None = None,
42
73
  options: dict[str, str] | None = None,
43
74
  delta_load_options: dict[Any, Any] | DeltaLoadOptions | None = None,
75
+ stream: bool = False,
44
76
  **_: Any, # define kwargs to match the base class signature
45
77
  ) -> PipelineContext:
46
78
  """Reads a table from Unity Catalog using a specified table identifier and optional reader configurations.
@@ -56,6 +88,16 @@ class ReadCatalogTableAction(PipelineAction):
56
88
  behavior, such as filters or reading modes. Defaults to None.
57
89
  delta_load_options: Options for delta loading, if applicable.
58
90
  Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
91
+ behavior, such as filters or reading modes.
92
+ stream: If True, the action will read the table as a stream.
93
+ checkpoint_location: The location for storing
94
+ checkpoints if streaming is enabled.
95
+ trigger_dict: A dictionary specifying the trigger
96
+ configuration for the streaming query, such as processing time or
97
+ continuous processing.
98
+ behavior, such as filters or reading modes. Defaults to None.
99
+ delta_load_options: Options for delta loading, if applicable.
100
+ Configures the [`DeltaLoader`][cloe_nessy.integration.delta_loader].
59
101
 
60
102
  Raises:
61
103
  ValueError: If neither `table_identifier` nor `table_metadata.identifier` in the `context` is provided.
@@ -89,10 +131,31 @@ class ReadCatalogTableAction(PipelineAction):
89
131
  runtime_info=context.runtime_info or {},
90
132
  )
91
133
 
92
- table_reader = CatalogReader()
93
- df = table_reader.read(
134
+ if isinstance(delta_load_options, dict):
135
+ delta_options_dict = delta_load_options
136
+ if delta_load_options:
137
+ delta_load_options = DeltaLoadOptions(**delta_load_options)
138
+ else:
139
+ delta_load_options = None
140
+ else:
141
+ delta_options_dict = delta_load_options.model_dump() if delta_load_options else {}
142
+
143
+ runtime_info = set_delta_load_info(
94
144
  table_identifier=table_identifier,
95
- options=options,
96
- delta_load_options=delta_load_options,
145
+ delta_load_options=delta_options_dict,
146
+ runtime_info=context.runtime_info or {},
97
147
  )
148
+
149
+ table_reader = CatalogReader()
150
+
151
+ if stream:
152
+ context.runtime_info = (context.runtime_info or {}) | {"streaming": True}
153
+ df = table_reader.read_stream(table_identifier=table_identifier, options=options)
154
+ else:
155
+ df = table_reader.read(
156
+ table_identifier=table_identifier,
157
+ options=options,
158
+ delta_load_options=delta_load_options,
159
+ )
160
+
98
161
  return context.from_existing(data=df, runtime_info=runtime_info)
@@ -1,66 +1,94 @@
1
- import pathlib
1
+ from pathlib import Path
2
2
  from typing import Any
3
3
 
4
- from ...models import Schema
4
+ from ...models import Table
5
5
  from ..pipeline_action import PipelineAction
6
6
  from ..pipeline_context import PipelineContext
7
7
 
8
8
 
9
9
  class ReadMetadataYAMLAction(PipelineAction):
10
- """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
10
+ """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
11
11
 
12
12
  Example:
13
- ```yaml
14
- Read Schema Metadata:
15
- action: READ_METADATA_YAML_ACTION
16
- options:
17
- path: excel_file_folder/excel_files_june/
18
- file_name: sales_schema.yml
19
- table_name: sales
20
- ```
13
+ === "Managed Table"
14
+ ```yaml
15
+ Read Table Metadata:
16
+ action: READ_METADATA_YAML_ACTION
17
+ options:
18
+ file_path: metadata/schemas/bronze/sales_table.yml
19
+ catalog_name: production
20
+ schema_name: sales_data
21
+ ```
22
+ === "External Table"
23
+ ```yaml
24
+ Read Table Metadata:
25
+ action: READ_METADATA_YAML_ACTION
26
+ options:
27
+ file_path: metadata/schemas/bronze/sales_table.yml
28
+ catalog_name: production
29
+ schema_name: sales_data
30
+ storage_path: abfs://external_storage/sales_data/sales_table
31
+ ```
21
32
  """
22
33
 
23
34
  name: str = "READ_METADATA_YAML_ACTION"
24
35
 
25
- @staticmethod
26
36
  def run(
37
+ self,
27
38
  context: PipelineContext,
28
39
  *,
29
- path: str | None = None,
30
- file_name: str | None = None,
31
- table_name: str | None = None,
40
+ file_path: str | None = None,
41
+ catalog_name: str | None = None,
42
+ schema_name: str | None = None,
43
+ storage_path: str | None = None,
32
44
  **_: Any,
33
45
  ) -> PipelineContext:
34
- """Reads schema metadata from a yaml file using the [`Schema`][cloe_nessy.models.schema] model.
46
+ """Reads table metadata from a yaml file using the [`Table`][cloe_nessy.models.table] model.
35
47
 
36
48
  Args:
37
49
  context: The context in which this Action is executed.
38
- path: The path to the data contract directory.
39
- file_name: The name of the file that defines the schema.
40
- table_name: The name of the table for which to retrieve metadata.
50
+ file_path: The path to the file that defines the table.
51
+ catalog_name: The name of the catalog for the table.
52
+ schema_name: The name of the schema for the table.
53
+ storage_path: The storage path for the table, if applicable. If not
54
+ provided, the table will be considered a managed table.
41
55
 
42
56
  Raises:
43
- ValueError: If any issues occur while reading the schema, such as an invalid schema,
44
- missing file, or missing path.
57
+ ValueError: If any issues occur while reading the table metadata, such as an invalid table,
58
+ missing file, missing path, or missing catalog/schema names.
45
59
 
46
60
  Returns:
47
61
  The context after the execution of this Action, containing the table metadata.
48
62
  """
49
- if not path:
50
- raise ValueError("No path provided. Please specify path to schema metadata.")
51
- if not file_name:
52
- raise ValueError("No file_name provided. Please specify file name.")
53
- if not table_name:
54
- raise ValueError("No table_name provided. Please specify table name.")
63
+ missing_params = []
64
+ if not file_path:
65
+ missing_params.append("file_path")
66
+ if not catalog_name:
67
+ missing_params.append("catalog_name")
68
+ if not schema_name:
69
+ missing_params.append("schema_name")
55
70
 
56
- path_obj = pathlib.Path(path)
71
+ if missing_params:
72
+ raise ValueError(
73
+ f"Missing required parameters: {', '.join(missing_params)}. Please specify all required parameters."
74
+ )
57
75
 
58
- schema, errors = Schema.read_instance_from_file(path_obj / file_name)
76
+ final_file_path = Path(file_path) if file_path else Path()
77
+
78
+ table, errors = Table.read_instance_from_file(
79
+ final_file_path,
80
+ catalog_name=catalog_name,
81
+ schema_name=schema_name,
82
+ )
59
83
  if errors:
60
- raise ValueError(f"Errors while reading schema metadata: {errors}")
61
- if not schema:
62
- raise ValueError("No schema found in metadata.")
84
+ raise ValueError(f"Errors while reading table metadata: {errors}")
85
+ if not table:
86
+ raise ValueError("No table found in metadata.")
63
87
 
64
- table = schema.get_table_by_name(table_name=table_name)
88
+ if not table.storage_path and storage_path:
89
+ self._console_logger.info(f"Setting storage path for table [ '{table.name}' ] to [ '{storage_path}' ]")
90
+ table.storage_path = storage_path
91
+ table.is_external = True
65
92
 
93
+ self._console_logger.info(f"Table [ '{table.name}' ] metadata read successfully from [ '{file_path}' ]")
66
94
  return context.from_existing(table_metadata=table)
@@ -1,5 +1,7 @@
1
1
  from typing import Any
2
2
 
3
+ from pyspark.sql import functions as F
4
+
3
5
  from ..pipeline_action import PipelineAction
4
6
  from ..pipeline_context import PipelineContext
5
7
  from ..pipeline_step import PipelineStep
@@ -13,20 +15,74 @@ class TransformJoinAction(PipelineAction):
13
15
  from [PySpark
14
16
  documentation](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.join.html)
15
17
 
16
- Example:
17
- ```yaml
18
- Join Tables:
19
- action: TRANSFORM_JOIN
20
- options:
21
- joined_data: ((step:Transform First Table))
22
- join_on: id
23
- how: anti
24
- ```
18
+ Examples:
19
+ === "Simple Column Join"
20
+ ```yaml
21
+ Join Tables:
22
+ action: TRANSFORM_JOIN
23
+ options:
24
+ joined_data: ((step:Transform First Table))
25
+ join_on: id
26
+ how: inner
27
+ ```
28
+
29
+ === "Multiple Columns Join"
30
+ ```yaml
31
+ Join Tables:
32
+ action: TRANSFORM_JOIN
33
+ options:
34
+ joined_data: ((step:Transform First Table))
35
+ join_on: [customer_id, order_date]
36
+ how: left
37
+ ```
38
+
39
+ === "Dictionary Join (Different Column Names)"
40
+ ```yaml
41
+ Join Tables:
42
+ action: TRANSFORM_JOIN
43
+ options:
44
+ joined_data: ((step:Transform First Table))
45
+ join_on:
46
+ customer_id: cust_id
47
+ order_date: date
48
+ how: inner
49
+ ```
50
+
51
+ === "Complex Join with Literals and Expressions"
52
+ ```yaml
53
+ Join Tables:
54
+ action: TRANSFORM_JOIN
55
+ options:
56
+ joined_data: ((step:Load Conditions Table))
57
+ join_condition: |
58
+ left.material = right.material
59
+ AND right.sales_org = '10'
60
+ AND right.distr_chan = '10'
61
+ AND right.knart = 'ZUVP'
62
+ AND right.lovmkond <> 'X'
63
+ AND right.sales_unit = 'ST'
64
+ AND left.calday BETWEEN
65
+ to_date(right.date_from, 'yyyyMMdd') AND
66
+ to_date(right.date_to, 'yyyyMMdd')
67
+ how: left
68
+ ```
25
69
 
26
70
  !!! note "Referencing a DataFrame from another step"
27
71
  The `joined_data` parameter is a reference to the DataFrame from another step.
28
72
  The DataFrame is accessed using the `result` attribute of the PipelineStep. The syntax
29
73
  for referencing the DataFrame is `((step:Step Name))`, mind the double parentheses.
74
+
75
+ !!! tip "Dictionary Join Syntax"
76
+ When using a dictionary for `join_on`, the keys represent columns
77
+ from the DataFrame in context and the values represent columns from
78
+ the DataFrame in `joined_data`. This is useful when joining tables
79
+ with different column names for the same logical entity.
80
+
81
+ !!! tip "Complex Join Conditions"
82
+ Use `join_condition` instead of `join_on` for complex joins with literals,
83
+ expressions, and multiple conditions. Reference columns using `left.column_name`
84
+ for the main DataFrame and `right.column_name` for the joined DataFrame.
85
+ Supports all PySpark functions and operators.
30
86
  """
31
87
 
32
88
  name: str = "TRANSFORM_JOIN"
@@ -37,6 +93,7 @@ class TransformJoinAction(PipelineAction):
37
93
  *,
38
94
  joined_data: PipelineStep | None = None,
39
95
  join_on: list[str] | str | dict[str, str] | None = None,
96
+ join_condition: str | None = None,
40
97
  how: str = "inner",
41
98
  **_: Any,
42
99
  ) -> PipelineContext:
@@ -49,13 +106,17 @@ class TransformJoinAction(PipelineAction):
49
106
  join_on: A string for the join column
50
107
  name, a list of column names, or a dictionary mapping columns from the
51
108
  left DataFrame to the right DataFrame. This defines the condition for the
52
- join operation.
109
+ join operation. Mutually exclusive with join_condition.
110
+ join_condition: A string containing a complex join expression with literals,
111
+ functions, and multiple conditions. Use 'left.' and 'right.' prefixes
112
+ to reference columns from respective DataFrames. Mutually exclusive with join_on.
53
113
  how: The type of join to perform. Must be one of: inner, cross, outer,
54
114
  full, fullouter, left, leftouter, right, rightouter, semi, anti, etc.
55
115
 
56
116
  Raises:
57
117
  ValueError: If no joined_data is provided.
58
- ValueError: If no join_on is provided.
118
+ ValueError: If neither join_on nor join_condition is provided.
119
+ ValueError: If both join_on and join_condition are provided.
59
120
  ValueError: If the data from context is None.
60
121
  ValueError: If the data from the joined_data is None.
61
122
 
@@ -64,8 +125,12 @@ class TransformJoinAction(PipelineAction):
64
125
  """
65
126
  if joined_data is None or joined_data.result is None or joined_data.result.data is None:
66
127
  raise ValueError("No joined_data provided.")
67
- if not join_on:
68
- raise ValueError("No join_on provided.")
128
+
129
+ if not join_on and not join_condition:
130
+ raise ValueError("Either join_on or join_condition must be provided.")
131
+
132
+ if join_on and join_condition:
133
+ raise ValueError("Cannot specify both join_on and join_condition. Use one or the other.")
69
134
 
70
135
  if context.data is None:
71
136
  raise ValueError("Data from the context is required for the operation.")
@@ -73,16 +138,25 @@ class TransformJoinAction(PipelineAction):
73
138
  df_right = joined_data.result.data.alias("right") # type: ignore
74
139
  df_left = context.data.alias("left") # type: ignore
75
140
 
76
- if isinstance(join_on, str):
77
- join_condition = [join_on]
78
- elif isinstance(join_on, list):
79
- join_condition = join_on
80
- else:
81
- join_condition = [
82
- df_left[left_column] == df_right[right_column] # type: ignore
83
- for left_column, right_column in join_on.items()
84
- ]
85
-
86
- df = df_left.join(df_right, on=join_condition, how=how) # type: ignore
141
+ if join_condition:
142
+ try:
143
+ condition = F.expr(join_condition)
144
+ except Exception as e:
145
+ # this will not raise an error in most cases, because the evaluation of the expression is lazy
146
+ raise ValueError(f"Failed to parse join condition '{join_condition}': {str(e)}") from e
147
+ df = df_left.join(df_right, on=condition, how=how) # type: ignore
148
+
149
+ if join_on:
150
+ if isinstance(join_on, str):
151
+ join_condition_list = [join_on]
152
+ elif isinstance(join_on, list):
153
+ join_condition_list = join_on
154
+ else:
155
+ join_condition_list = [
156
+ df_left[left_column] == df_right[right_column] # type: ignore
157
+ for left_column, right_column in join_on.items()
158
+ ]
159
+
160
+ df = df_left.join(df_right, on=join_condition_list, how=how) # type: ignore
87
161
 
88
162
  return context.from_existing(data=df) # type: ignore
@@ -22,8 +22,8 @@ class TransformUnionAction(PipelineAction):
22
22
  action: TRANSFORM_UNION
23
23
  options:
24
24
  union_data:
25
- - ((step: Filter First Table))
26
- - ((step: SQL Transform Second Table))
25
+ - ((step:Filter First Table))
26
+ - ((step:SQL Transform Second Table))
27
27
  ```
28
28
  !!! note "Referencing a DataFrame from another step"
29
29
  The `union_data` parameter is a reference to the DataFrame from another step.
@@ -2,6 +2,7 @@ from typing import Any
2
2
 
3
3
  from ...integration.delta_loader import consume_delta_load
4
4
  from ...integration.writer import CatalogWriter
5
+ from ...object_manager import TableManager
5
6
  from ..pipeline_action import PipelineAction
6
7
  from ..pipeline_context import PipelineContext
7
8
 
@@ -9,17 +10,31 @@ from ..pipeline_context import PipelineContext
9
10
  class WriteCatalogTableAction(PipelineAction):
10
11
  """Writes a DataFrame to a specified catalog table using [CatalogWriter][cloe_nessy.integration.writer.CatalogWriter].
11
12
 
12
- Example:
13
- ```yaml
14
- Write Table to Catalog:
15
- action: WRITE_CATALOG_TABLE
16
- options:
17
- table_identifier: my_catalog.business_schema.sales_table
18
- mode: append
19
- partition_by: day
13
+ Examples:
14
+ === "Batch Write"
15
+ ```yaml
16
+ Write Table to Catalog:
17
+ action: WRITE_CATALOG_TABLE
20
18
  options:
21
- mergeSchema: true
22
- ```
19
+ table_identifier: my_catalog.business_schema.sales_table
20
+ mode: append
21
+ partition_by: day
22
+ options:
23
+ mergeSchema: true
24
+ ```
25
+ === "Streaming Write"
26
+ ```yaml
27
+ Write Table to Catalog Stream:
28
+ action: WRITE_CATALOG_TABLE
29
+ options:
30
+ table_identifier: my_catalog.business_schema.sales_table
31
+ mode: append
32
+ checkpoint_location: /path/to/checkpoint
33
+ trigger_dict:
34
+ processingTime: 10 seconds
35
+ options:
36
+ mergeSchema: true
37
+ ```
23
38
  """
24
39
 
25
40
  name: str = "WRITE_CATALOG_TABLE"
@@ -32,6 +47,9 @@ class WriteCatalogTableAction(PipelineAction):
32
47
  mode: str = "append",
33
48
  partition_by: str | list[str] | None = None,
34
49
  options: dict[str, str] | None = None,
50
+ checkpoint_location: str | None = None,
51
+ trigger_dict: dict | None = None,
52
+ await_termination: bool = False,
35
53
  **_: Any,
36
54
  ) -> PipelineContext:
37
55
  """Writes a DataFrame to a specified catalog table.
@@ -44,7 +62,11 @@ class WriteCatalogTableAction(PipelineAction):
44
62
  mode: The write mode. One of 'append', 'overwrite', 'error',
45
63
  'errorifexists', or 'ignore'.
46
64
  partition_by: Names of the partitioning columns.
47
- options: PySpark options for the DataFrame.saveAsTable operation (e.g. mergeSchema:true).
65
+ checkpoint_location: Location for checkpointing.
66
+ trigger_dict: A dictionary specifying the trigger configuration for the streaming query.
67
+ await_termination: If True, the function will wait for the streaming
68
+ query to finish before returning.
69
+ options: Additional options for the DataFrame write operation.
48
70
 
49
71
  Raises:
50
72
  ValueError: If the table name is not specified or cannot be inferred from
@@ -55,25 +77,48 @@ class WriteCatalogTableAction(PipelineAction):
55
77
  """
56
78
  if not options:
57
79
  options = dict()
58
- if partition_by is None:
59
- if hasattr(context.table_metadata, "partition_by"):
60
- partition_by = context.table_metadata.partition_by # type: ignore
80
+ streaming = context.runtime_info and context.runtime_info.get("streaming")
81
+ if streaming and not checkpoint_location:
82
+ raise ValueError("Checkpoint location must be specified for streaming writes.")
83
+ if (
84
+ partition_by is None
85
+ and context.table_metadata is not None
86
+ and hasattr(context.table_metadata, "partition_by")
87
+ and not context.table_metadata.liquid_clustering
88
+ ):
89
+ partition_by = context.table_metadata.partition_by # type: ignore
61
90
 
62
91
  if (table_metadata := context.table_metadata) and table_identifier is None:
63
92
  table_identifier = table_metadata.identifier
64
93
  if table_identifier is None:
65
94
  raise ValueError("Table name must be specified or a valid Table object with identifier must be set.")
66
95
 
96
+ if table_metadata:
97
+ manager = TableManager()
98
+ manager.create_table(table=table_metadata, ignore_if_exists=True, replace=False)
99
+
67
100
  runtime_info = getattr(context, "runtime_info", None)
68
101
  if runtime_info and runtime_info.get("is_delta_load"):
69
102
  consume_delta_load(runtime_info)
70
103
 
71
104
  writer = CatalogWriter()
72
- writer.write_table(
73
- df=context.data, # type: ignore
74
- table_identifier=table_identifier,
75
- mode=mode,
76
- partition_by=partition_by,
77
- options=options,
78
- )
105
+
106
+ if streaming:
107
+ writer.write_stream(
108
+ df=context.data, # type: ignore
109
+ table_identifier=table_identifier,
110
+ checkpoint_location=checkpoint_location,
111
+ trigger_dict=trigger_dict,
112
+ options=options,
113
+ mode=mode,
114
+ await_termination=await_termination,
115
+ )
116
+ else:
117
+ writer.write(
118
+ df=context.data, # type: ignore
119
+ table_identifier=table_identifier,
120
+ mode=mode,
121
+ partition_by=partition_by,
122
+ options=options,
123
+ )
79
124
  return context.from_existing()
@@ -117,6 +117,7 @@ class WriteDeltaMergeAction(PipelineAction):
117
117
 
118
118
  delta_merge_writer.write(
119
119
  table_identifier=context.table_metadata.identifier,
120
+ table=context.table_metadata,
120
121
  storage_path=str(context.table_metadata.storage_path),
121
122
  data_frame=context.data,
122
123
  key_columns=key_columns,
@@ -1,36 +1,36 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cloe-nessy
3
- Version: 0.3.17.0
3
+ Version: 0.3.18
4
4
  Summary: Your friendly datalake monster.
5
+ Project-URL: homepage, https://initions.com/
5
6
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
7
  License: MIT
7
- Project-URL: homepage, https://initions.com/
8
8
  Classifier: Development Status :: 5 - Production/Stable
9
9
  Classifier: Environment :: Console
10
- Classifier: License :: OSI Approved :: MIT License
11
10
  Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
12
  Classifier: Operating System :: OS Independent
13
13
  Classifier: Programming Language :: Python :: 3
14
14
  Classifier: Topic :: Database
15
15
  Requires-Python: <3.13,>=3.11
16
- Description-Content-Type: text/markdown
17
- Requires-Dist: pydantic<3.0.0,>=2.7.2
18
- Requires-Dist: pyyaml<7.0.0,>=6.0.1
19
- Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
20
- Requires-Dist: jinja2<4.0.0,>=3.1.4
21
- Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
22
- Requires-Dist: openpyxl<4.0.0,>=3.1.5
23
- Requires-Dist: requests<3.0.0,>=2.32.3
24
- Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
25
- Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
16
  Requires-Dist: azure-identity<2.0.0,>=1.19.0
27
- Requires-Dist: httpx<1.0.0,>=0.27.2
17
+ Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
28
18
  Requires-Dist: databricks-sdk<1.0.0,>=0.36.0
29
- Requires-Dist: networkx<4.0,>=3.3
19
+ Requires-Dist: fsspec<2025.7.1,>=2025.7.0
20
+ Requires-Dist: httpx<1.0.0,>=0.27.2
21
+ Requires-Dist: jinja2<4.0.0,>=3.1.4
30
22
  Requires-Dist: matplotlib<4.0.0,>=3.9.2
23
+ Requires-Dist: networkx<4.0,>=3.3
24
+ Requires-Dist: openpyxl<4.0.0,>=3.1.5
25
+ Requires-Dist: pandas-stubs<3.0.0.0,>=2.2.2.240807
26
+ Requires-Dist: pydantic-settings<3.0.0,>=2.4.0
27
+ Requires-Dist: pydantic<3.0.0,>=2.7.2
28
+ Requires-Dist: pyyaml<7.0.0,>=6.0.1
29
+ Requires-Dist: requests<3.0.0,>=2.32.3
31
30
  Requires-Dist: types-networkx<4.0.0.0,>=3.2.1.20240820
32
- Requires-Dist: fsspec<2025.7.1,>=2025.7.0
33
- Requires-Dist: cloe-logging[databricks,log-analytics]<0.4,>=0.3.8
31
+ Requires-Dist: types-pyyaml<7.0.0.0,>=6.0.12.20240311
32
+ Requires-Dist: types-requests<3.0.0.0,>=2.32.0.20240712
33
+ Description-Content-Type: text/markdown
34
34
 
35
35
  # cloe-nessy
36
36
 
@@ -19,7 +19,7 @@ cloe_nessy/file_utilities/strategies/utils_strategy.py,sha256=urayKfOUpSaXKgTs1K
19
19
  cloe_nessy/integration/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  cloe_nessy/integration/delta_loader/__init__.py,sha256=ZdBDde1uPtTCL_KAhilVmtVmmGvH5dHb05QsOozkteE,438
21
21
  cloe_nessy/integration/delta_loader/delta_load_options.py,sha256=bbPGhC0n8L6CmcmV91Xqq6fWRimxlUHUkr22uVqG0g4,1363
22
- cloe_nessy/integration/delta_loader/delta_loader.py,sha256=D5oOvVLWRwl0z0iQScXVOapErAl6Z5Kt3qXedchgq0s,6878
22
+ cloe_nessy/integration/delta_loader/delta_loader.py,sha256=WOl44Udvo6hZ5PVFgabpehs8tt5nl9AYyDnnYBba5Ck,6872
23
23
  cloe_nessy/integration/delta_loader/delta_loader_factory.py,sha256=vB1cL6-Nc3SkLH1xtazMbMF1MnNYq8-g3GHZzRE3QmE,2251
24
24
  cloe_nessy/integration/delta_loader/delta_loader_metadata_table.py,sha256=G_EWUY76ZlbsPZB9LCGlOLVezk7DK6peYXEgt7-sTQE,1683
25
25
  cloe_nessy/integration/delta_loader/strategies/__init__.py,sha256=1o5fRWenL5KnUg1hf7kmTuTpG9pbMxchiQTub52Qvwo,255
@@ -27,18 +27,18 @@ cloe_nessy/integration/delta_loader/strategies/delta_cdf_loader.py,sha256=FOOZqt
27
27
  cloe_nessy/integration/delta_loader/strategies/delta_timestamp_loader.py,sha256=YYFH0DkdRPvITUc1JMgkmgIHjwDyZDCjqvEk2qhBMfE,6185
28
28
  cloe_nessy/integration/reader/__init__.py,sha256=J5vlORqHLBpHEvzIwfIjzN5xEdOat-8jlmdLcGj8nsA,239
29
29
  cloe_nessy/integration/reader/api_reader.py,sha256=3Mf-txOTJ1dXCzdNtRTLC8UKftKms4NxOoLVgzcc2eo,5691
30
- cloe_nessy/integration/reader/catalog_reader.py,sha256=7jFuqIPpuz03opULh2I0TCLPfW6AqkxjaW2kCc0oM1g,3292
30
+ cloe_nessy/integration/reader/catalog_reader.py,sha256=w-oUHpyiIwJppa-BW5E_HaMxpNgVWaCQVNSTvuEr9qA,4815
31
31
  cloe_nessy/integration/reader/excel_reader.py,sha256=8KCqKBYFE6RGCiahJimQOAtbYZzaUzlnoslW9yca5P8,8035
32
32
  cloe_nessy/integration/reader/exceptions.py,sha256=_A9jFpe_RIDZCGY76qzjic9bsshxns6yXPSl141dq1c,203
33
33
  cloe_nessy/integration/reader/file_reader.py,sha256=Za_DZKUq1vATp8kIS8uY9IDHiaReZO0k80rrPHAhi5A,8132
34
34
  cloe_nessy/integration/reader/reader.py,sha256=e2KVPePQme8SBQJEbL-3zpGasOgTiEvKFTslow2wGPw,1034
35
35
  cloe_nessy/integration/writer/__init__.py,sha256=3yzCAGiWZdQWtsbzlTih01sxVTJV2DDYwvl34lEAUlE,243
36
- cloe_nessy/integration/writer/catalog_writer.py,sha256=Gb-hMdADgO_uUJ7mZPHBYyNme2qXsdFFnzwo7GcShHM,2192
36
+ cloe_nessy/integration/writer/catalog_writer.py,sha256=Z26FOL3D9KK6I7Y3rgl4c88rToKZnVXlELTYH2xQsHY,5289
37
37
  cloe_nessy/integration/writer/file_writer.py,sha256=SUDbN13ZzDhbM8DpOGFgM_Gkg70To4L6Q182pXx2HRM,5454
38
38
  cloe_nessy/integration/writer/writer.py,sha256=elFPLFrWR-qVE9qnBtzzzhyRALLQcRVuOsPS0rNmRt4,1741
39
39
  cloe_nessy/integration/writer/delta_writer/__init__.py,sha256=h2CT6Hllmk0nodlek27uqwniCzVZKMkYcPGyG9K2Z24,164
40
40
  cloe_nessy/integration/writer/delta_writer/delta_append_writer.py,sha256=TbpW-j87_H9dcUza34uR6VWslJez406y3_5N1ip0SnM,4740
41
- cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=no2GOLqMAJd0fEy2mqMevMj_CvutcJPRmXJC2tD4icA,10112
41
+ cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py,sha256=Yp_q_ycasW2_wwmzty_6fZeBVcW_0o8gLrr6F1gaUjQ,10195
42
42
  cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py,sha256=m4YFY9_WgaOcnpBviVt3Km-w3wf3NF25wPS-n0NBGcE,970
43
43
  cloe_nessy/integration/writer/delta_writer/delta_writer_base.py,sha256=upUtDZMzwYFU0kzmkelVgkpFToXkrypcR3h_jvGjz14,8596
44
44
  cloe_nessy/integration/writer/delta_writer/exceptions.py,sha256=xPmGiYV0xQXauln5Oh34E5vbm0rVcs6xCh-SJSb2bw0,107
@@ -46,10 +46,10 @@ cloe_nessy/logging/__init__.py,sha256=ySVCVbdyR3Dno_tl2ZfiER_7EVaDoQMHVkNyfdMZum
46
46
  cloe_nessy/logging/logger_mixin.py,sha256=H8MyMEyb_kEDP0Ow5QStAFLuOkTIeUnneGaj916fKlU,7443
47
47
  cloe_nessy/models/__init__.py,sha256=-FmWEJ1Oq1njSopjc0R7GmT64mLSmALkm8PkHNzy9Y8,327
48
48
  cloe_nessy/models/catalog.py,sha256=ayC1sMp4cNLAZtu0ICVV3Us6-o4hn8U9tpzzvxC9RAs,177
49
- cloe_nessy/models/column.py,sha256=t-MX9GMs7l5W0APvsUxiE1TI9SWkKdFKblmz24s4IHY,1995
49
+ cloe_nessy/models/column.py,sha256=W4V1Ls1d60VyZ1Ko9Yu9eSipcMbxSzKicn0aloHPiR0,2027
50
50
  cloe_nessy/models/constraint.py,sha256=hsFlhn4n928z81O3dl3v5bMetewPWzMjkJK3_4kASSM,178
51
51
  cloe_nessy/models/foreign_key.py,sha256=DwRVHs9sShqqPV-NL7ow_3AmPPWX0Od26yZn_I565pU,1001
52
- cloe_nessy/models/schema.py,sha256=yUrjjEhAH5zbCymE67Az_jPnVB8hGO-_UNfqzeZCD_Y,3376
52
+ cloe_nessy/models/schema.py,sha256=cNSrH7K4hLRrkg1E6fW6DUIBMZdR2A5B21POj5iQ4GA,3429
53
53
  cloe_nessy/models/table.py,sha256=3AUBUKLJv1x-xN9KYc5Ndjf-lAlT83rUYdhRKy8wFU4,12105
54
54
  cloe_nessy/models/types.py,sha256=XRbuJGdTNa6aXyE3IAzs_J9gVjbfkzMDLfGl-k6jI_4,223
55
55
  cloe_nessy/models/volume.py,sha256=51BE06FrL1Wv6zblFwJ_HTiR6WQqH7pSmrdH90rqwLg,2444
@@ -58,10 +58,10 @@ cloe_nessy/models/adapter/unity_catalog_adapter.py,sha256=a-14Ys-AevVYQd0xeJU1sy
58
58
  cloe_nessy/models/mixins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
59
  cloe_nessy/models/mixins/read_instance_mixin.py,sha256=j5Y4aNWOh1jlskEaxNooZFJgPyxRmik00gAVLJnAaRs,4507
60
60
  cloe_nessy/models/mixins/template_loader_mixin.py,sha256=5MXhEGBFlq3dwZvINEyBowSlipNnVun2H_TmhI_fsS4,549
61
- cloe_nessy/models/templates/create_table.sql.j2,sha256=QWbiTXwmGaIlZUAIGL4pAlHkDbP9mq1vGAkdKCPOqm4,1669
61
+ cloe_nessy/models/templates/create_table.sql.j2,sha256=71JpUyUZ_ZYO2M0tfIrTXHR7JycypAGsELt2-2d3oO0,2479
62
62
  cloe_nessy/models/templates/create_volume.sql.j2,sha256=XIUf1cHcvAxcGTyhzUiv4xpQ1cfDw_ra3_FKmOuLoBs,289
63
63
  cloe_nessy/object_manager/__init__.py,sha256=3sle0vNpPwBOkycxA3XVS9m4XZf5LD3Qd4NGxdqcHno,186
64
- cloe_nessy/object_manager/table_manager.py,sha256=m6u_KFYCPoqq1hagwt3s7eQopjV2oOJNlmXDVAfku-k,12703
64
+ cloe_nessy/object_manager/table_manager.py,sha256=4eQG-zMiuBpeJmvWdL3KdhHRiPFf8TS0RFNRp8Yz6rY,13887
65
65
  cloe_nessy/object_manager/volume_manager.py,sha256=6epd3KXzcNH04EvaKubAfLsaUm9qBMeT3KNvMK04gGs,2727
66
66
  cloe_nessy/pipeline/__init__.py,sha256=sespmJ5JsgyiFyZiedTiL2kg--zGIX7cjTYsD5vemEg,325
67
67
  cloe_nessy/pipeline/pipeline.py,sha256=L4wk3b06LNWRj01nnAkuQpeRrwFTyaV1xTpgYAg4sak,10819
@@ -73,10 +73,10 @@ cloe_nessy/pipeline/pipeline_plotting_service.py,sha256=goMQj73FzUVchKn5c2SsPcWR
73
73
  cloe_nessy/pipeline/pipeline_step.py,sha256=UlnmpS6gm_dZ7m9dD1mZvye7mvUF_DA7HjOZo0oGYDU,1977
74
74
  cloe_nessy/pipeline/actions/__init__.py,sha256=RZ1UVSn9v88F4GKgHy6UYDzx8zSAMQScJLCeiHO5f8A,2802
75
75
  cloe_nessy/pipeline/actions/read_api.py,sha256=RBv5XeHtjTXuCP09Fqo6JNx6iIhQQI-nuAHCuSaGs2s,7778
76
- cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=TBlJaXJAQwLtwvh7dXsX9ebNN3rS6En6951MnT8xGG8,4101
76
+ cloe_nessy/pipeline/actions/read_catalog_table.py,sha256=EkP3JSI7VQMkvUsb97ieUeGnnfvyyUI7egvqNWMqK0I,6894
77
77
  cloe_nessy/pipeline/actions/read_excel.py,sha256=Mhl3r_2Hqk2XN7Fl5WqqAyE4JdnwSiivbhWMglyBtkE,7961
78
78
  cloe_nessy/pipeline/actions/read_files.py,sha256=hRcM7wG35vxxLVajW3SK5euHW02qxiXCYSkIl11xiQ0,7308
79
- cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=3ZDy9qiDYtM1oDQzHPC23hLOvHjhdk5zg1wVHE60m9k,2295
79
+ cloe_nessy/pipeline/actions/read_metadata_yaml.py,sha256=i8fQceV63eAqx_x0ANisCkXWfMHyhqsfFHVFH5yP2po,3544
80
80
  cloe_nessy/pipeline/actions/transform_change_datatype.py,sha256=24Tn6R3TvUkWCh8V6naLdyNbCbqvyPOOoer-hy_Ebq4,2077
81
81
  cloe_nessy/pipeline/actions/transform_clean_column_names.py,sha256=VxvWqENW63c50L96JA1V_ioe4By6gGzx_iY86njOXEM,3044
82
82
  cloe_nessy/pipeline/actions/transform_concat_columns.py,sha256=Nk8YbhxDnFZsWzW9Dj5Yl76Uq6VrcMlevQPHGms65L8,3777
@@ -88,15 +88,15 @@ cloe_nessy/pipeline/actions/transform_filter.py,sha256=Nz_ggRfKIcNzYFfFOsgq1Qeat
88
88
  cloe_nessy/pipeline/actions/transform_generic_sql.py,sha256=_naWfmPdYAUKjPNeHu5qJAohOL7DHCSYz_kwoeRv3OI,2741
89
89
  cloe_nessy/pipeline/actions/transform_group_aggregate.py,sha256=KUHeeP-RIDi34dpbsPEJkzea5zFJA6MuyjNpOsFud9o,4045
90
90
  cloe_nessy/pipeline/actions/transform_hash_columns.py,sha256=H8j_Xadnm3npVNA_nu7Be7v0bJV20ELKMxSsVHHl6CY,8407
91
- cloe_nessy/pipeline/actions/transform_join.py,sha256=e_tvMk8YJTAWcUK_EmOgNt0s31ICZoMX_MKOTWx4lBY,3645
91
+ cloe_nessy/pipeline/actions/transform_join.py,sha256=ez1M1wVc9khOZj1swMArJbBKXxEpjenUHrW1wL8H330,7200
92
92
  cloe_nessy/pipeline/actions/transform_json_normalize.py,sha256=petF7pnNq1EKc8MqVdG0weFALAHNILSe_eAu4Z5XxIo,4833
93
93
  cloe_nessy/pipeline/actions/transform_rename_columns.py,sha256=4zJcPCONMU4C67qeuzsrX3AORRRHoq_selUI7FJyeg0,1952
94
94
  cloe_nessy/pipeline/actions/transform_replace_values.py,sha256=1OPHTrjcphfyGepcO7ozYfeqfwA18pjlyHpVKUS_AAU,2049
95
95
  cloe_nessy/pipeline/actions/transform_select_columns.py,sha256=-GhSEsb7iNnZIsYRm3BG9BX4_qUDJMbpj1DsKPY046w,4574
96
- cloe_nessy/pipeline/actions/transform_union.py,sha256=s81Vge0AbYPc7VkskCYfOQ_LEjqcmfNFyDkytfjcZyo,2720
97
- cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=vZ7bZcrZY47P_EVYNshMNZ34l7Orhs8Q9--5Ud5hhLI,2906
96
+ cloe_nessy/pipeline/actions/transform_union.py,sha256=SZtEzh567CIExUj9yMEgshE28h4dXKT7Wr2TDj4zB4k,2718
97
+ cloe_nessy/pipeline/actions/write_catalog_table.py,sha256=FyC0scQU8Ul3Uigpk6IN2IJpf_4jRjAqF5yHtDVwG00,4852
98
98
  cloe_nessy/pipeline/actions/write_delta_append.py,sha256=2F5qnKPsY_F-2672Ce4Gub7qdna157jEqHHc429fO2A,2962
99
- cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=kQE4xLbVEUnpYImZLnpZxp88Tuf6VNSeU1W-zI8Wuvw,5805
99
+ cloe_nessy/pipeline/actions/write_delta_merge.py,sha256=zcOk4ytZFUxyGY8U2fdFPLFnw2g_yhaS_vOx_e3wCuE,5847
100
100
  cloe_nessy/pipeline/actions/write_file.py,sha256=JZ8UZslxUn_ttYt5wDyvtHFq2FqYk3vOR8kvExJI8pk,3212
101
101
  cloe_nessy/pipeline/utils/__init__.py,sha256=xi02UjBMiXWD7b9gDvww4gyRyowb0eRd_6Wbu0F_cro,118
102
102
  cloe_nessy/pipeline/utils/delta_load_utils.py,sha256=KitMNruxePEkecI0h4Jint1JwJpaEog5mCOchMkgan8,1495
@@ -107,7 +107,6 @@ cloe_nessy/settings/settings.py,sha256=I4n129lrujriW-d8q4as2Kb4_kI932ModfZ5Ow_Up
107
107
  cloe_nessy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
108
  cloe_nessy/utils/column_names.py,sha256=dCNtm61mc5aLkY2oE4rlfN3VLCrpot6fOESjAZmCmhA,361
109
109
  cloe_nessy/utils/file_and_directory_handler.py,sha256=r2EVt9xG81p6ScaJCwETC5an6pMT6WseB0jMOR-JlpU,602
110
- cloe_nessy-0.3.17.0.dist-info/METADATA,sha256=hR0GqdboYwzBrbZY_ese9kt250DIOHgMlAj3QOqLhF8,3292
111
- cloe_nessy-0.3.17.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
112
- cloe_nessy-0.3.17.0.dist-info/top_level.txt,sha256=Z7izn8HmQpg2wBUb-0jzaKlYKMU7Ypzuc9__9vPtW_I,11
113
- cloe_nessy-0.3.17.0.dist-info/RECORD,,
110
+ cloe_nessy-0.3.18.dist-info/METADATA,sha256=Sc5JD6FrXR1GwPA9VHv4guNxs-hPHa9GBZz31zOQbL8,3290
111
+ cloe_nessy-0.3.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
112
+ cloe_nessy-0.3.18.dist-info/RECORD,,
@@ -1,5 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: hatchling 1.27.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
-
@@ -1 +0,0 @@
1
- cloe_nessy