cloe-nessy 0.3.5__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. cloe_nessy/__init__.py +0 -0
  2. cloe_nessy/clients/__init__.py +0 -0
  3. cloe_nessy/clients/api_client/__init__.py +0 -0
  4. cloe_nessy/clients/api_client/api_client.py +0 -0
  5. cloe_nessy/clients/api_client/api_response.py +0 -0
  6. cloe_nessy/clients/api_client/auth.py +0 -0
  7. cloe_nessy/clients/api_client/exceptions.py +0 -0
  8. cloe_nessy/file_utilities/__init__.py +0 -0
  9. cloe_nessy/file_utilities/exceptions.py +0 -0
  10. cloe_nessy/file_utilities/factory.py +0 -0
  11. cloe_nessy/file_utilities/get_file_paths.py +0 -0
  12. cloe_nessy/file_utilities/location_types.py +0 -0
  13. cloe_nessy/file_utilities/strategies/__init__.py +0 -0
  14. cloe_nessy/file_utilities/strategies/base_strategy.py +0 -0
  15. cloe_nessy/file_utilities/strategies/local_strategy.py +0 -0
  16. cloe_nessy/file_utilities/strategies/onelake_strategy.py +0 -0
  17. cloe_nessy/file_utilities/strategies/utils_strategy.py +0 -0
  18. cloe_nessy/integration/__init__.py +0 -0
  19. cloe_nessy/integration/reader/__init__.py +0 -0
  20. cloe_nessy/integration/reader/api_reader.py +4 -2
  21. cloe_nessy/integration/reader/catalog_reader.py +6 -3
  22. cloe_nessy/integration/reader/excel_reader.py +1 -1
  23. cloe_nessy/integration/reader/exceptions.py +0 -0
  24. cloe_nessy/integration/reader/file_reader.py +78 -5
  25. cloe_nessy/integration/reader/reader.py +0 -0
  26. cloe_nessy/integration/writer/__init__.py +8 -1
  27. cloe_nessy/integration/writer/catalog_writer.py +0 -0
  28. cloe_nessy/integration/writer/delta_writer/__init__.py +7 -0
  29. cloe_nessy/integration/writer/delta_writer/delta_append_writer.py +108 -0
  30. cloe_nessy/integration/writer/delta_writer/delta_merge_writer.py +215 -0
  31. cloe_nessy/integration/writer/delta_writer/delta_table_operation_type.py +21 -0
  32. cloe_nessy/integration/writer/delta_writer/delta_writer_base.py +210 -0
  33. cloe_nessy/integration/writer/delta_writer/exceptions.py +4 -0
  34. cloe_nessy/integration/writer/file_writer.py +132 -0
  35. cloe_nessy/integration/writer/writer.py +54 -0
  36. cloe_nessy/logging/__init__.py +0 -0
  37. cloe_nessy/logging/logger_mixin.py +0 -0
  38. cloe_nessy/models/__init__.py +4 -0
  39. cloe_nessy/models/adapter/__init__.py +3 -0
  40. cloe_nessy/models/adapter/unity_catalog_adapter.py +296 -0
  41. cloe_nessy/models/catalog.py +10 -0
  42. cloe_nessy/models/column.py +0 -0
  43. cloe_nessy/models/constraint.py +0 -0
  44. cloe_nessy/models/foreign_key.py +0 -0
  45. cloe_nessy/models/mixins/__init__.py +0 -0
  46. cloe_nessy/models/mixins/read_instance_mixin.py +0 -0
  47. cloe_nessy/models/mixins/template_loader_mixin.py +0 -0
  48. cloe_nessy/models/schema.py +20 -1
  49. cloe_nessy/models/table.py +67 -11
  50. cloe_nessy/models/types.py +0 -0
  51. cloe_nessy/models/volume.py +67 -0
  52. cloe_nessy/object_manager/__init__.py +7 -2
  53. cloe_nessy/object_manager/table_manager.py +251 -21
  54. cloe_nessy/object_manager/volume_manager.py +70 -0
  55. cloe_nessy/pipeline/__init__.py +0 -0
  56. cloe_nessy/pipeline/actions/__init__.py +9 -1
  57. cloe_nessy/pipeline/actions/read_api.py +0 -0
  58. cloe_nessy/pipeline/actions/read_catalog_table.py +1 -4
  59. cloe_nessy/pipeline/actions/read_excel.py +0 -0
  60. cloe_nessy/pipeline/actions/read_files.py +0 -0
  61. cloe_nessy/pipeline/actions/read_metadata_yaml.py +0 -0
  62. cloe_nessy/pipeline/actions/transform_change_datatype.py +0 -0
  63. cloe_nessy/pipeline/actions/transform_clean_column_names.py +0 -0
  64. cloe_nessy/pipeline/actions/transform_concat_columns.py +0 -0
  65. cloe_nessy/pipeline/actions/transform_decode.py +0 -0
  66. cloe_nessy/pipeline/actions/transform_deduplication.py +0 -0
  67. cloe_nessy/pipeline/actions/transform_distinct.py +0 -0
  68. cloe_nessy/pipeline/actions/transform_filter.py +0 -0
  69. cloe_nessy/pipeline/actions/transform_generic_sql.py +0 -0
  70. cloe_nessy/pipeline/actions/transform_group_aggregate.py +0 -0
  71. cloe_nessy/pipeline/actions/transform_hash_columns.py +209 -0
  72. cloe_nessy/pipeline/actions/transform_join.py +0 -0
  73. cloe_nessy/pipeline/actions/transform_json_normalize.py +0 -0
  74. cloe_nessy/pipeline/actions/transform_rename_columns.py +0 -0
  75. cloe_nessy/pipeline/actions/transform_replace_values.py +0 -0
  76. cloe_nessy/pipeline/actions/transform_select_columns.py +0 -0
  77. cloe_nessy/pipeline/actions/transform_union.py +0 -0
  78. cloe_nessy/pipeline/actions/write_catalog_table.py +0 -0
  79. cloe_nessy/pipeline/actions/write_delta_append.py +69 -0
  80. cloe_nessy/pipeline/actions/write_delta_merge.py +118 -0
  81. cloe_nessy/pipeline/actions/write_file.py +94 -0
  82. cloe_nessy/pipeline/pipeline.py +44 -2
  83. cloe_nessy/pipeline/pipeline_action.py +0 -0
  84. cloe_nessy/pipeline/pipeline_config.py +0 -0
  85. cloe_nessy/pipeline/pipeline_context.py +0 -0
  86. cloe_nessy/pipeline/pipeline_parsing_service.py +0 -0
  87. cloe_nessy/pipeline/pipeline_step.py +0 -0
  88. cloe_nessy/py.typed +0 -0
  89. cloe_nessy/session/__init__.py +0 -0
  90. cloe_nessy/session/session_manager.py +27 -0
  91. cloe_nessy/settings/__init__.py +0 -0
  92. cloe_nessy/settings/settings.py +0 -0
  93. cloe_nessy/utils/__init__.py +0 -0
  94. cloe_nessy/utils/file_and_directory_handler.py +0 -0
  95. cloe_nessy-0.3.9.dist-info/METADATA +70 -0
  96. {cloe_nessy-0.3.5.dist-info → cloe_nessy-0.3.9.dist-info}/RECORD +35 -18
  97. {cloe_nessy-0.3.5.dist-info → cloe_nessy-0.3.9.dist-info}/WHEEL +1 -1
  98. {cloe_nessy-0.3.5.dist-info → cloe_nessy-0.3.9.dist-info}/top_level.txt +0 -0
  99. cloe_nessy-0.3.5.dist-info/METADATA +0 -26
@@ -0,0 +1,296 @@
1
+ from pyspark.sql import SparkSession
2
+ from pyspark.sql import functions as F
3
+
4
+ from cloe_nessy.logging.logger_mixin import LoggerMixin
5
+ from cloe_nessy.models import ForeignKey
6
+
7
+ from ...session import SessionManager
8
+ from ..catalog import Catalog
9
+ from ..column import Column
10
+ from ..schema import Schema
11
+ from ..table import Table
12
+
13
+
14
+ class UnityCatalogAdapter(LoggerMixin):
15
+ """Acts as a translator between Unity Catalog metadata and Nessy Models."""
16
+
17
+ def __init__(self, spark: SparkSession | None = None):
18
+ """Initializes the UnityCatalogAdapter class."""
19
+ self._spark = spark or SessionManager.get_spark_session()
20
+ self._console_logger = self.get_console_logger()
21
+ self._catalogs = self.get_catalogs()
22
+
23
+ def _execute_sql(self, query):
24
+ """Execute a SQL query and return a DataFrame.
25
+
26
+ This wrapper is used for better testability.
27
+
28
+ Returns:
29
+ The resulting DataFrame after executing the SQL query.
30
+ """
31
+ return self._spark.sql(query)
32
+
33
+ def get_catalogs(self) -> list[Catalog]:
34
+ """Retrieve a list of catalogs with their associated metadata.
35
+
36
+ Returns:
37
+ A list of `Catalog` objects.
38
+ """
39
+ df = self._execute_sql("SHOW CATALOGS")
40
+ catalogs = []
41
+ for catalog in df.collect():
42
+ name = catalog["catalog"]
43
+ catalog_metadata = self._execute_sql(f"DESCRIBE CATALOG EXTENDED {name}")
44
+ pivoted_metadata = catalog_metadata.withColumn("dummy", F.lit("dummy"))
45
+ pivoted_df = pivoted_metadata.groupBy("dummy").pivot("info_name").agg(F.first("info_value"))
46
+ catalog_owner = pivoted_df.collect()[0]["Owner"]
47
+ comment = pivoted_df.collect()[0]["Comment"]
48
+ catalogs.append(Catalog(name=name, owner=catalog_owner, comment=comment))
49
+ return catalogs
50
+
51
+ def get_catalog_by_name(self, name: str) -> Catalog | None:
52
+ """Returns a Catalog by its name.
53
+
54
+ Args:
55
+ name: The name of the Catalog.
56
+
57
+ Returns:
58
+ The Catalog with the specified name.
59
+ """
60
+ for catalog in self._catalogs:
61
+ if catalog.name == name:
62
+ return catalog
63
+ self._console_logger.warning(f"No catalog found with name: {name}")
64
+ return None
65
+
66
+ def get_catalog_schemas(self, catalog: str | Catalog) -> list[Schema]:
67
+ """Collects all schemas in a given catalog.
68
+
69
+ Args:
70
+ catalog: The catalog from which the schemas are to be collected.
71
+
72
+ Returns:
73
+ A list of `Schema` objects.
74
+ """
75
+ schemas = []
76
+ if isinstance(catalog, Catalog):
77
+ catalog = catalog.name
78
+ schemas_df = self._execute_sql(f"SELECT * FROM {catalog}.information_schema.schemata").collect()
79
+
80
+ for schema in schemas_df:
81
+ schemas.append(
82
+ Schema(
83
+ name=schema["schema_name"],
84
+ catalog=catalog,
85
+ comment=schema["comment"],
86
+ ),
87
+ )
88
+ return schemas
89
+
90
+ def get_schema_by_name(self, catalog: str | Catalog, name: str) -> Schema | None:
91
+ """Retrieve a schema by its name from a specified catalog.
92
+
93
+ Args:
94
+ catalog: The catalog from which to retrieve the schema.
95
+ This can be either a string representing the catalog name or a
96
+ `Catalog` object.
97
+ name: The name of the schema to retrieve.
98
+
99
+ Returns:
100
+ The `Schema` object if found, otherwise `None`.
101
+ """
102
+ if isinstance(catalog, Catalog):
103
+ catalog = catalog.name
104
+ schemas = self.get_catalog_schemas(catalog)
105
+ for schema in schemas:
106
+ if schema.name == name:
107
+ schema = self.add_tables_to_schema(catalog, schema)
108
+ return schema
109
+
110
+ self._console_logger.warning(f"No Schema in Catalog [ '{catalog}' ] found with name [ '{name}' ]")
111
+ return None
112
+
113
+ def get_table_by_name(self, table_identifier: str) -> Table | None:
114
+ """Retrieve a table by it's name."""
115
+ if len(table_identifier.split(".")) != 3:
116
+ raise ValueError("The identifier must be in the format 'catalog.schema.table'")
117
+
118
+ catalog_name, schema_name, table_name = table_identifier.split(".")
119
+ table_metadata_df = self._execute_sql(
120
+ f"""
121
+ SELECT * FROM {catalog_name}.information_schema.tables
122
+ WHERE table_catalog == '{catalog_name}'
123
+ AND table_schema == '{schema_name}'
124
+ AND table_name == '{table_name}'
125
+ AND table_type <> 'VIEW'
126
+ """,
127
+ )
128
+ if not table_metadata_df.head(1):
129
+ table = None
130
+ else:
131
+ table_metadata = table_metadata_df.collect()[0]
132
+ table_tags_list = self._execute_sql(
133
+ f"""
134
+ SELECT tag_name, tag_value FROM {catalog_name}.information_schema.table_tags
135
+ WHERE catalog_name == '{catalog_name}'
136
+ AND schema_name == '{schema_name}'
137
+ AND table_name == '{table_name}'
138
+ """,
139
+ ).collect()
140
+ table_tags = {r["tag_name"]: r["tag_value"] for r in table_tags_list}
141
+ table = Table(
142
+ identifier=table_identifier,
143
+ data_source_format=table_metadata["data_source_format"],
144
+ business_properties=table_tags,
145
+ storage_path=table_metadata["storage_path"],
146
+ columns=[],
147
+ is_external=table_metadata["table_type"] != "MANAGED",
148
+ )
149
+ table = self.add_columns_to_table(table)
150
+ return table
151
+
152
+ def add_tables_to_schema(self, catalog: str | Catalog, schema: str | Schema) -> Schema:
153
+ """Add tables to a schema within a specified catalog.
154
+
155
+ This method retrieves all tables within the specified schema and catalog,
156
+ and adds them to the `Schema` object. The schema is updated with `Table`
157
+ objects containing details about each table.
158
+
159
+ Args:
160
+ catalog: The catalog containing the schema. This can be
161
+ either a string representing the catalog name or a `Catalog` object.
162
+ schema: The schema to which tables will be added. This
163
+ can be either a string representing the schema name or a `Schema`
164
+ object.
165
+
166
+ Returns:
167
+ The updated `Schema` object with tables added.
168
+ """
169
+ if isinstance(catalog, Catalog):
170
+ catalog_name = catalog.name
171
+ else:
172
+ catalog_name = catalog
173
+ if isinstance(schema, str):
174
+ schema_obj = self.get_schema_by_name(catalog_name, schema)
175
+ if schema_obj is None:
176
+ raise ValueError(f"Schema {schema} not found in catalog {catalog_name}.")
177
+ else:
178
+ schema_obj = schema
179
+ tables_df = self._execute_sql(
180
+ f"SELECT * FROM {catalog_name}.information_schema.tables WHERE table_catalog == '{catalog_name}' AND table_schema == '{schema_obj.name}' AND table_type <> 'VIEW'",
181
+ ).collect()
182
+ for table_row in tables_df:
183
+ table_name = table_row["table_name"]
184
+ table_tags_list = self._execute_sql(
185
+ f"""SELECT tag_name, tag_value FROM {catalog_name}.information_schema.table_tags
186
+ WHERE
187
+ catalog_name == '{catalog_name}'
188
+ AND schema_name == '{schema_obj.name}'
189
+ AND table_name == '{table_name}'
190
+ """,
191
+ ).collect()
192
+ table_tags = {r["tag_name"]: r["tag_value"] for r in table_tags_list}
193
+
194
+ table = Table(
195
+ data_source_format=table_row["data_source_format"],
196
+ identifier=f"{catalog}.{schema_obj.name}.{table_name}",
197
+ business_properties=table_tags,
198
+ columns=[],
199
+ )
200
+ table = self.add_columns_to_table(table)
201
+ schema_obj.add_table(table)
202
+ return schema_obj
203
+
204
+ def add_columns_to_table(self, table: Table) -> Table:
205
+ """Add columns to a table by retrieving column metadata from the information schema.
206
+
207
+ This method retrieves column details for the specified `table` from the
208
+ information schema and adds `Column` objects to the `Table`. It also identifies
209
+ primary key columns for the table.
210
+
211
+ Args:
212
+ table: The `Table` object to which columns will be added. The
213
+ `Table` object must have its `identifier` attribute set.
214
+
215
+ Returns:
216
+ The updated `Table` object with columns added.
217
+ """
218
+ if not table.identifier:
219
+ raise ValueError("Please set the Identifier of the Table to use this method.")
220
+ cols_df = self._execute_sql(
221
+ f"""
222
+ SELECT * FROM {table.catalog}.information_schema.columns
223
+ WHERE table_name == '{table.name}'
224
+ AND table_schema == '{table.schema}'
225
+ ORDER BY ordinal_position
226
+ """,
227
+ ).collect()
228
+ partition_cols_indexed = {}
229
+ for col_row in cols_df:
230
+ generated = "GENERATED ALWAYS AS IDENTITY" if col_row["is_identity"] == "YES" else None
231
+ table.add_column(
232
+ Column(
233
+ name=col_row["column_name"],
234
+ data_type=col_row["data_type"],
235
+ default_value=col_row["column_default"],
236
+ generated=generated,
237
+ nullable=col_row["is_nullable"] == "YES",
238
+ ),
239
+ )
240
+ if col_row["partition_index"] is not None:
241
+ partition_cols_indexed.update({str(col_row["partition_index"]): col_row["column_name"]})
242
+ partitioned_by = [val for _, val in sorted(partition_cols_indexed.items())]
243
+ if partitioned_by:
244
+ table.liquid_clustering = False
245
+ table.partition_by = partitioned_by
246
+ table = self._identify_pk_columns(table)
247
+ table = self._identify_fk_constraints(table)
248
+ return table
249
+
250
+ def _identify_pk_columns(self, table: Table) -> Table:
251
+ result = self._execute_sql(
252
+ f"""
253
+ SELECT A.column_name
254
+ FROM {table.catalog}.information_schema.key_column_usage AS A
255
+ JOIN {table.catalog}.information_schema.table_constraints AS B
256
+ USING (constraint_catalog, constraint_schema, constraint_name)
257
+ WHERE
258
+ A.table_catalog = '{table.catalog}'
259
+ AND A.table_schema = '{table.schema}'
260
+ AND A.table_name = '{table.name}'
261
+ AND B.constraint_type = 'PRIMARY KEY'
262
+ """,
263
+ ).collect()
264
+ table.composite_primary_key = [col_row["column_name"] for col_row in result]
265
+ return table
266
+
267
+ def _identify_fk_constraints(self, table: Table) -> Table:
268
+ result = self._execute_sql(
269
+ f"""
270
+ SELECT
271
+ concat_ws(".", C.table_catalog, C.table_schema, C.table_name) as source_table_identifier,
272
+ C.column_name as source_column,
273
+ concat_ws(".", B.table_catalog, B.table_schema, B.table_name) as parent_table_identifier,
274
+ B.column_name as parent_column
275
+ -- fk_option currently not supported
276
+ -- ,concat_ws(" ",D.match_option, "ON UPDATE", D.update_rule, "ON DELETE", D.delete_rule) AS fk_options
277
+ FROM {table.catalog}.information_schema.table_constraints AS A
278
+ LEFT JOIN {table.catalog}.information_schema.constraint_column_usage AS B USING(constraint_name)
279
+ LEFT JOIN {table.catalog}.information_schema.key_column_usage AS C USING(constraint_name)
280
+ -- LEFT JOIN {table.catalog}.information_schema.referential_constraints AS D USING(constraint_name)
281
+ WHERE
282
+ A.table_catalog == '{table.catalog}'
283
+ AND A.table_schema = '{table.schema}'
284
+ AND A.table_name == '{table.name}'
285
+ AND A.constraint_type == "FOREIGN KEY"
286
+ """,
287
+ ).collect()
288
+ table.foreign_keys = [
289
+ ForeignKey(
290
+ foreign_key_columns=fk_row["source_column"],
291
+ parent_table=fk_row["parent_table_identifier"],
292
+ parent_columns=fk_row["parent_column"],
293
+ )
294
+ for fk_row in result
295
+ ]
296
+ return table
@@ -0,0 +1,10 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class Catalog:
6
+ """A class representing a Unity Catalog - Catalog."""
7
+
8
+ name: str
9
+ owner: str = ""
10
+ comment: str = ""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -17,6 +17,7 @@ class Schema(ReadInstancesMixin):
17
17
  storage_path: str | None = None
18
18
  tables: list[Table] = Field(default_factory=list)
19
19
  properties: dict[str, Any] = Field(default_factory=dict)
20
+ comment: str | None = None
20
21
 
21
22
  @classmethod
22
23
  def read_instance_from_file(
@@ -48,7 +49,7 @@ class Schema(ReadInstancesMixin):
48
49
  instance_path=processed_instance_path.parents[0] / table_dir_name,
49
50
  catalog_name=schema.catalog,
50
51
  schema_name=schema.name,
51
- schema_storage_path=Path(schema.storage_path),
52
+ schema_storage_path=schema.storage_path,
52
53
  fail_on_missing_subfolder=fail_on_missing_subfolder,
53
54
  )
54
55
  schema.tables = tables
@@ -74,3 +75,21 @@ class Schema(ReadInstancesMixin):
74
75
  raise ValueError(f"Table {table_name} not found in {self.catalog}.{self.name} metadata.")
75
76
 
76
77
  return table
78
+
79
+ def add_table(self, table: Table):
80
+ """Adds a table to the schema and sets the table identifier accordingly.
81
+
82
+ Args:
83
+ table: A Table object that is added to the Schema tables.
84
+ """
85
+ table.identifier = f"{self.catalog}.{self.name}.{table.name}"
86
+ self.tables.append(table)
87
+
88
+ def add_tables(self, tables: list[Table]) -> None:
89
+ """Adds tables to the schema.
90
+
91
+ Args:
92
+ tables: A list of Table objects that are added to the Schema tables.
93
+ """
94
+ for table in tables:
95
+ self.add_table(table)
@@ -4,7 +4,13 @@ from typing import Any, Self
4
4
  import yaml
5
5
  import yaml.scanner
6
6
  from jinja2 import TemplateNotFound
7
- from pydantic import Field, ValidationError, ValidationInfo, field_validator, model_validator
7
+ from pydantic import (
8
+ Field,
9
+ ValidationError,
10
+ ValidationInfo,
11
+ field_validator,
12
+ model_validator,
13
+ )
8
14
 
9
15
  from ..logging import LoggerMixin
10
16
  from ..utils.file_and_directory_handler import process_path
@@ -24,11 +30,14 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
24
30
  is_external: bool | None = None
25
31
  partition_by: list[str] = Field(default_factory=list)
26
32
  liquid_clustering: bool | None = None
33
+ composite_primary_key: list[str] = Field(default_factory=list)
27
34
  properties: dict[str, str] = Field(default_factory=dict)
28
35
  constraints: list[Constraint] = Field(default_factory=list)
29
36
  foreign_keys: list[ForeignKey] = Field(default_factory=list)
30
- storage_path: Path | None = None
37
+ storage_path: str | None = None
38
+ business_properties: dict[str, str] = Field(default_factory=dict)
31
39
  comment: str | None = None
40
+ data_source_format: str | None = None
32
41
 
33
42
  def model_post_init(self, __context: Any) -> None:
34
43
  """Post init method for the Table model."""
@@ -87,11 +96,10 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
87
96
 
88
97
  @model_validator(mode="after")
89
98
  def _validate_is_external(cls, table: Self):
90
- """If is_external is set to False, storage_path has to be None."""
91
- if not table.is_external and table.storage_path is not None:
92
- raise ValueError("is_external cannot be false while storage_path is set.")
93
- elif table.is_external and table.storage_path is None:
99
+ """If is_external is set to True, storage_path has to be set."""
100
+ if table.is_external and table.storage_path is None:
94
101
  raise ValueError("is_external cannot be true while storage_path is None.")
102
+ return table
95
103
 
96
104
  @classmethod
97
105
  def read_instances_from_directory(
@@ -153,7 +161,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
153
161
  sub_errors: list[ValidationErrorType] = []
154
162
  if instance_file.is_file() and instance_file.suffix in (".yaml", ".yml"):
155
163
  instance, sub_errors = cls.read_instance_from_file(
156
- instance_file, catalog_name, schema_name, schema_storage_path
164
+ instance_file, catalog_name, schema_name, str(schema_storage_path)
157
165
  )
158
166
  instances += [] if instance is None else [instance]
159
167
  errors += sub_errors
@@ -205,9 +213,9 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
205
213
  data["identifier"] = f"{catalog_name}.{schema_name}.{data['name']}"
206
214
  if data.get("is_external"):
207
215
  if storage_path := data.get("storage_path"):
208
- data["storage_path"] = Path(storage_path)
216
+ data["storage_path"] = storage_path
209
217
  elif schema_storage_path:
210
- data["storage_path"] = schema_storage_path / data["name"]
218
+ data["storage_path"] = (schema_storage_path / data["name"]).as_posix()
211
219
  else:
212
220
  raise ValueError(
213
221
  f"Neither storage path nor schema storage path of table {data['name']} has been provided."
@@ -215,14 +223,18 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
215
223
 
216
224
  instance, sub_errors = cls.metadata_to_instance(data)
217
225
  errors += sub_errors
218
- except (ValidationError, yaml.parser.ParserError, yaml.scanner.ScannerError) as e:
226
+ except (
227
+ ValidationError,
228
+ yaml.parser.ParserError,
229
+ yaml.scanner.ScannerError,
230
+ ) as e:
219
231
  instance = None
220
232
  errors.append(e)
221
233
  return instance, errors
222
234
 
223
235
  def get_create_statement(
224
236
  self,
225
- templates: Path = Path("./templates"),
237
+ templates: Path = Path("./src/cloe_nessy/models/templates/"),
226
238
  template_name: str = "create_table.sql.j2",
227
239
  replace: bool = True,
228
240
  ):
@@ -234,3 +246,47 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
234
246
  raise err
235
247
  render = template.render(table=self, replace=replace)
236
248
  return render
249
+
250
+ def get_column_by_name(self, column_name: str) -> Column | None:
251
+ """Get a column by name.
252
+
253
+ Args:
254
+ column_name: The name of the column to get.
255
+
256
+ Returns:
257
+ The column if found, else None.
258
+ """
259
+ for column in self.columns:
260
+ if column.name == column_name:
261
+ return column
262
+ return None
263
+
264
+ def update_column(self, column: Column) -> None:
265
+ """Replaces a Column with a new Column object to update it.
266
+
267
+ Args:
268
+ column: The new column object, to replace the old one.
269
+ """
270
+ self.remove_column(column)
271
+ self.add_column(column)
272
+
273
+ def add_column(self, column: Column):
274
+ """Adds a column to the table.
275
+
276
+ Args:
277
+ column: The column to be added.
278
+ """
279
+ self.columns.append(column)
280
+
281
+ def remove_column(self, column: str | Column) -> None:
282
+ """Remove a column from the Table.
283
+
284
+ Args.
285
+ column: The column to be removed.
286
+ """
287
+ if isinstance(column, Column):
288
+ column_name = column.name
289
+ else:
290
+ column_name = column
291
+
292
+ self.columns = [col for col in self.columns if col.name != column_name]
File without changes
@@ -0,0 +1,67 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ from jinja2 import TemplateNotFound
5
+ from pydantic import BaseModel, field_validator
6
+
7
+ from ..logging import LoggerMixin
8
+ from .mixins.template_loader_mixin import TemplateLoaderMixin
9
+
10
+
11
+ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
12
+ """Volume class for managing volumes."""
13
+
14
+ identifier: str
15
+ storage_path: str | Path
16
+ comment: str | None = None
17
+
18
+ @field_validator("identifier")
19
+ def check_identifier(cls, value):
20
+ """Check the identifier."""
21
+ if value.count(".") != 2:
22
+ raise ValueError("The identifier must be in the format 'catalog.schema.volume_name'.")
23
+ return value
24
+
25
+ @property
26
+ def storage_identifier(self) -> str:
27
+ """Return the storage identifier."""
28
+ return f"/Volumes/{self.catalog}/{self.schema}/{self.name}/"
29
+
30
+ @property
31
+ def catalog(self) -> str:
32
+ """Return the catalog name."""
33
+ return self.identifier.split(".")[0]
34
+
35
+ @property
36
+ def schema_name(self) -> str:
37
+ """Return the schema name."""
38
+ return self.identifier.split(".")[1]
39
+
40
+ @property
41
+ def name(self) -> str:
42
+ """Return the table name."""
43
+ return self.identifier.split(".")[2]
44
+
45
+ @property
46
+ def escaped_identifier(self) -> str:
47
+ """Return the escaped identifier."""
48
+ return f"`{self.catalog}`.`{self.schema_name}`.`{self.name}`"
49
+
50
+ def model_post_init(self, __context: Any) -> None:
51
+ """Post init method for the Table model."""
52
+ self._console_logger = self.get_console_logger()
53
+ self._console_logger.debug(f"Model for volume [ '{self.identifier}' ] has been initialized.")
54
+
55
+ def get_create_statement(
56
+ self,
57
+ templates: Path = Path("./src/cloe_nessy/models/templates/"),
58
+ template_name: str = "create_volume.sql.j2",
59
+ ):
60
+ """Get the create statement for the Volume."""
61
+ try:
62
+ template = self.get_template(templates, template_name)
63
+ except TemplateNotFound as err:
64
+ self._console_logger.error(f"Template [ {template_name} ] not found.")
65
+ raise err
66
+ render = template.render(volume=self)
67
+ return render
@@ -1,3 +1,8 @@
1
- from .table_manager import TableManager
1
+ from .table_manager import TableManager, table_log_decorator
2
+ from .volume_manager import VolumeManager
2
3
 
3
- __all__ = ["TableManager"]
4
+ __all__ = [
5
+ "TableManager",
6
+ "table_log_decorator",
7
+ "VolumeManager",
8
+ ]