cloe-nessy 0.3.3__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. cloe_nessy/__init__.py +0 -0
  2. cloe_nessy/clients/__init__.py +0 -0
  3. cloe_nessy/clients/api_client/__init__.py +0 -0
  4. cloe_nessy/clients/api_client/api_client.py +0 -0
  5. cloe_nessy/clients/api_client/api_response.py +0 -0
  6. cloe_nessy/clients/api_client/auth.py +0 -0
  7. cloe_nessy/clients/api_client/exceptions.py +0 -0
  8. cloe_nessy/file_utilities/__init__.py +0 -0
  9. cloe_nessy/file_utilities/exceptions.py +0 -0
  10. cloe_nessy/file_utilities/factory.py +0 -0
  11. cloe_nessy/file_utilities/get_file_paths.py +0 -0
  12. cloe_nessy/file_utilities/location_types.py +0 -0
  13. cloe_nessy/file_utilities/strategies/__init__.py +0 -0
  14. cloe_nessy/file_utilities/strategies/base_strategy.py +0 -0
  15. cloe_nessy/file_utilities/strategies/local_strategy.py +0 -0
  16. cloe_nessy/file_utilities/strategies/onelake_strategy.py +0 -0
  17. cloe_nessy/file_utilities/strategies/utils_strategy.py +0 -0
  18. cloe_nessy/integration/__init__.py +0 -0
  19. cloe_nessy/integration/reader/__init__.py +0 -0
  20. cloe_nessy/integration/reader/api_reader.py +0 -0
  21. cloe_nessy/integration/reader/catalog_reader.py +0 -0
  22. cloe_nessy/integration/reader/excel_reader.py +0 -0
  23. cloe_nessy/integration/reader/exceptions.py +0 -0
  24. cloe_nessy/integration/reader/file_reader.py +7 -1
  25. cloe_nessy/integration/reader/reader.py +0 -0
  26. cloe_nessy/integration/writer/__init__.py +0 -0
  27. cloe_nessy/integration/writer/catalog_writer.py +1 -1
  28. cloe_nessy/logging/__init__.py +0 -0
  29. cloe_nessy/logging/logger_mixin.py +0 -0
  30. cloe_nessy/models/__init__.py +4 -0
  31. cloe_nessy/models/adapter/__init__.py +3 -0
  32. cloe_nessy/models/adapter/unity_catalog_adapter.py +292 -0
  33. cloe_nessy/models/catalog.py +10 -0
  34. cloe_nessy/models/column.py +0 -0
  35. cloe_nessy/models/constraint.py +0 -0
  36. cloe_nessy/models/foreign_key.py +0 -0
  37. cloe_nessy/models/mixins/__init__.py +0 -0
  38. cloe_nessy/models/mixins/read_instance_mixin.py +0 -0
  39. cloe_nessy/models/mixins/template_loader_mixin.py +0 -0
  40. cloe_nessy/models/schema.py +19 -0
  41. cloe_nessy/models/table.py +50 -5
  42. cloe_nessy/models/types.py +0 -0
  43. cloe_nessy/models/volume.py +67 -0
  44. cloe_nessy/object_manager/__init__.py +7 -2
  45. cloe_nessy/object_manager/table_manager.py +183 -7
  46. cloe_nessy/object_manager/volume_manager.py +70 -0
  47. cloe_nessy/pipeline/__init__.py +0 -0
  48. cloe_nessy/pipeline/actions/__init__.py +2 -0
  49. cloe_nessy/pipeline/actions/read_api.py +69 -45
  50. cloe_nessy/pipeline/actions/read_catalog_table.py +9 -9
  51. cloe_nessy/pipeline/actions/read_excel.py +14 -10
  52. cloe_nessy/pipeline/actions/read_files.py +54 -28
  53. cloe_nessy/pipeline/actions/read_metadata_yaml.py +9 -9
  54. cloe_nessy/pipeline/actions/transform_change_datatype.py +13 -8
  55. cloe_nessy/pipeline/actions/transform_clean_column_names.py +4 -0
  56. cloe_nessy/pipeline/actions/transform_concat_columns.py +25 -11
  57. cloe_nessy/pipeline/actions/transform_decode.py +18 -7
  58. cloe_nessy/pipeline/actions/transform_deduplication.py +9 -9
  59. cloe_nessy/pipeline/actions/transform_distinct.py +8 -8
  60. cloe_nessy/pipeline/actions/transform_filter.py +6 -6
  61. cloe_nessy/pipeline/actions/transform_generic_sql.py +12 -6
  62. cloe_nessy/pipeline/actions/transform_group_aggregate.py +20 -26
  63. cloe_nessy/pipeline/actions/transform_hash_columns.py +209 -0
  64. cloe_nessy/pipeline/actions/transform_join.py +17 -10
  65. cloe_nessy/pipeline/actions/transform_json_normalize.py +19 -6
  66. cloe_nessy/pipeline/actions/transform_rename_columns.py +7 -7
  67. cloe_nessy/pipeline/actions/transform_replace_values.py +8 -8
  68. cloe_nessy/pipeline/actions/transform_select_columns.py +38 -9
  69. cloe_nessy/pipeline/actions/transform_union.py +12 -8
  70. cloe_nessy/pipeline/actions/write_catalog_table.py +11 -10
  71. cloe_nessy/pipeline/pipeline.py +44 -2
  72. cloe_nessy/pipeline/pipeline_action.py +0 -0
  73. cloe_nessy/pipeline/pipeline_config.py +0 -0
  74. cloe_nessy/pipeline/pipeline_context.py +0 -0
  75. cloe_nessy/pipeline/pipeline_parsing_service.py +0 -0
  76. cloe_nessy/pipeline/pipeline_step.py +0 -0
  77. cloe_nessy/py.typed +0 -0
  78. cloe_nessy/session/__init__.py +0 -0
  79. cloe_nessy/session/session_manager.py +27 -0
  80. cloe_nessy/settings/__init__.py +0 -0
  81. cloe_nessy/settings/settings.py +0 -0
  82. cloe_nessy/utils/__init__.py +0 -0
  83. cloe_nessy/utils/file_and_directory_handler.py +0 -0
  84. cloe_nessy-0.3.8.dist-info/METADATA +46 -0
  85. {cloe_nessy-0.3.3.dist-info → cloe_nessy-0.3.8.dist-info}/RECORD +41 -35
  86. {cloe_nessy-0.3.3.dist-info → cloe_nessy-0.3.8.dist-info}/WHEEL +1 -1
  87. {cloe_nessy-0.3.3.dist-info → cloe_nessy-0.3.8.dist-info}/top_level.txt +0 -0
  88. cloe_nessy-0.3.3.dist-info/METADATA +0 -26
cloe_nessy/__init__.py CHANGED
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -46,7 +46,13 @@ class FileReader(BaseReader):
46
46
  if not spark_format and not extension:
47
47
  raise ValueError("Either spark_format or extension must be provided.")
48
48
  self._console_logger.debug(f"Reading files from [ '{location}' ] ...")
49
- extension_to_datatype_dict = {"csv": "csv", "json": "json", "parquet": "parquet", "txt": "text", "xml": "xml"}
49
+ extension_to_datatype_dict = {
50
+ "csv": "csv",
51
+ "json": "json",
52
+ "parquet": "parquet",
53
+ "txt": "text",
54
+ "xml": "xml",
55
+ }
50
56
 
51
57
  if extension and not spark_format:
52
58
  if extension not in extension_to_datatype_dict:
File without changes
File without changes
@@ -20,7 +20,7 @@ class CatalogWriter:
20
20
  format 'catalog.schema.table'.
21
21
  mode: The write mode. One of append, overwrite, error, errorifexists, ignore.
22
22
  partition_by: Names of the partitioning columns.
23
- options: All other string options.
23
+ options: PySpark options for the DataFrame.saveAsTable operation (e.g. mergeSchema:true).
24
24
 
25
25
  Notes:
26
26
  append: Append contents of this DataFrame to existing data.
File without changes
File without changes
@@ -1,13 +1,17 @@
1
+ from .catalog import Catalog
1
2
  from .column import Column
2
3
  from .constraint import Constraint
3
4
  from .foreign_key import ForeignKey
4
5
  from .schema import Schema
5
6
  from .table import Table
7
+ from .volume import Volume
6
8
 
7
9
  __all__ = [
10
+ "Catalog",
8
11
  "Column",
9
12
  "Constraint",
10
13
  "Table",
11
14
  "Schema",
12
15
  "ForeignKey",
16
+ "Volume",
13
17
  ]
@@ -0,0 +1,3 @@
1
+ from .unity_catalog_adapter import UnityCatalogAdapter
2
+
3
+ __all__ = ["UnityCatalogAdapter"]
@@ -0,0 +1,292 @@
1
+ from pyspark.sql import SparkSession
2
+ from pyspark.sql import functions as F
3
+
4
+ from cloe_nessy.logging.logger_mixin import LoggerMixin
5
+ from cloe_nessy.models import ForeignKey
6
+
7
+ from ...session import SessionManager
8
+ from ..catalog import Catalog
9
+ from ..column import Column
10
+ from ..schema import Schema
11
+ from ..table import Table
12
+
13
+
14
+ class UnityCatalogAdapter(LoggerMixin):
15
+ """Acts as a translator between Unity Catalog metadata and Nessy Models."""
16
+
17
+ def __init__(self, spark: SparkSession | None = None):
18
+ """Initializes the UnityCatalogAdapter class."""
19
+ self._spark = spark or SessionManager.get_spark_session()
20
+ self._console_logger = self.get_console_logger()
21
+ self._catalogs = self.get_catalogs()
22
+
23
+ def _execute_sql(self, query):
24
+ """Execute a SQL query and return a DataFrame.
25
+
26
+ This wrapper is used for better testability.
27
+
28
+ Returns:
29
+ The resulting DataFrame after executing the SQL query.
30
+ """
31
+ return self._spark.sql(query)
32
+
33
+ def get_catalogs(self) -> list[Catalog]:
34
+ """Retrieve a list of catalogs with their associated metadata.
35
+
36
+ Returns:
37
+ A list of `Catalog` objects.
38
+ """
39
+ df = self._execute_sql("SHOW CATALOGS")
40
+ catalogs = []
41
+ for catalog in df.collect():
42
+ name = catalog["catalog"]
43
+ catalog_metadata = self._execute_sql(f"DESCRIBE CATALOG EXTENDED {name}")
44
+ pivoted_metadata = catalog_metadata.withColumn("dummy", F.lit("dummy"))
45
+ pivoted_df = pivoted_metadata.groupBy("dummy").pivot("info_name").agg(F.first("info_value"))
46
+ catalog_owner = pivoted_df.collect()[0]["Owner"]
47
+ comment = pivoted_df.collect()[0]["Comment"]
48
+ catalogs.append(Catalog(name=name, owner=catalog_owner, comment=comment))
49
+ return catalogs
50
+
51
+ def get_catalog_by_name(self, name: str) -> Catalog | None:
52
+ """Returns a Catalog by its name.
53
+
54
+ Args:
55
+ name: The name of the Catalog.
56
+
57
+ Returns:
58
+ The Catalog with the specified name.
59
+ """
60
+ for catalog in self._catalogs:
61
+ if catalog.name == name:
62
+ return catalog
63
+ self._console_logger.warning(f"No catalog found with name: {name}")
64
+ return None
65
+
66
+ def get_catalog_schemas(self, catalog: str | Catalog) -> list[Schema]:
67
+ """Collects all schemas in a given catalog.
68
+
69
+ Args:
70
+ catalog: The catalog from which the schemas are to be collected.
71
+
72
+ Returns:
73
+ A list of `Schema` objects.
74
+ """
75
+ schemas = []
76
+ if isinstance(catalog, Catalog):
77
+ catalog = catalog.name
78
+ schemas_df = self._execute_sql(f"SELECT * FROM {catalog}.information_schema.schemata").collect()
79
+
80
+ for schema in schemas_df:
81
+ schemas.append(
82
+ Schema(name=schema["schema_name"], catalog=catalog, comment=schema["comment"]),
83
+ )
84
+ return schemas
85
+
86
+ def get_schema_by_name(self, catalog: str | Catalog, name: str) -> Schema | None:
87
+ """Retrieve a schema by its name from a specified catalog.
88
+
89
+ Args:
90
+ catalog: The catalog from which to retrieve the schema.
91
+ This can be either a string representing the catalog name or a
92
+ `Catalog` object.
93
+ name: The name of the schema to retrieve.
94
+
95
+ Returns:
96
+ The `Schema` object if found, otherwise `None`.
97
+ """
98
+ if isinstance(catalog, Catalog):
99
+ catalog = catalog.name
100
+ schemas = self.get_catalog_schemas(catalog)
101
+ for schema in schemas:
102
+ if schema.name == name:
103
+ schema = self.add_tables_to_schema(catalog, schema)
104
+ return schema
105
+
106
+ self._console_logger.warning(f"No Schema in Catalog [ '{catalog}' ] found with name [ '{name}' ]")
107
+ return None
108
+
109
+ def get_table_by_name(self, table_identifier: str) -> Table | None:
110
+ """Retrieve a table by it's name."""
111
+ if len(table_identifier.split(".")) != 3:
112
+ raise ValueError("The identifier must be in the format 'catalog.schema.table'")
113
+
114
+ catalog_name, schema_name, table_name = table_identifier.split(".")
115
+ table_metadata_df = self._execute_sql(
116
+ f"""
117
+ SELECT * FROM {catalog_name}.information_schema.tables
118
+ WHERE table_catalog == '{catalog_name}'
119
+ AND table_schema == '{schema_name}'
120
+ AND table_name == '{table_name}'
121
+ AND table_type <> 'VIEW'
122
+ """,
123
+ )
124
+ if not table_metadata_df.head(1):
125
+ table = None
126
+ else:
127
+ table_metadata = table_metadata_df.collect()[0]
128
+ table_tags_list = self._execute_sql(
129
+ f"""
130
+ SELECT tag_name, tag_value FROM {catalog_name}.information_schema.table_tags
131
+ WHERE catalog_name == '{catalog_name}'
132
+ AND schema_name == '{schema_name}'
133
+ AND table_name == '{table_name}'
134
+ """,
135
+ ).collect()
136
+ table_tags = {r["tag_name"]: r["tag_value"] for r in table_tags_list}
137
+ table = Table(
138
+ identifier=table_identifier,
139
+ data_source_format=table_metadata["data_source_format"],
140
+ business_properties=table_tags,
141
+ storage_path=table_metadata["storage_path"],
142
+ columns=[],
143
+ is_external=table_metadata["table_type"] != "MANAGED",
144
+ )
145
+ table = self.add_columns_to_table(table)
146
+ return table
147
+
148
+ def add_tables_to_schema(self, catalog: str | Catalog, schema: str | Schema) -> Schema:
149
+ """Add tables to a schema within a specified catalog.
150
+
151
+ This method retrieves all tables within the specified schema and catalog,
152
+ and adds them to the `Schema` object. The schema is updated with `Table`
153
+ objects containing details about each table.
154
+
155
+ Args:
156
+ catalog: The catalog containing the schema. This can be
157
+ either a string representing the catalog name or a `Catalog` object.
158
+ schema: The schema to which tables will be added. This
159
+ can be either a string representing the schema name or a `Schema`
160
+ object.
161
+
162
+ Returns:
163
+ The updated `Schema` object with tables added.
164
+ """
165
+ if isinstance(catalog, Catalog):
166
+ catalog_name = catalog.name
167
+ else:
168
+ catalog_name = catalog
169
+ if isinstance(schema, str):
170
+ schema_obj = self.get_schema_by_name(catalog_name, schema)
171
+ if schema_obj is None:
172
+ raise ValueError(f"Schema {schema} not found in catalog {catalog_name}.")
173
+ else:
174
+ schema_obj = schema
175
+ tables_df = self._execute_sql(
176
+ f"SELECT * FROM {catalog_name}.information_schema.tables WHERE table_catalog == '{catalog_name}' AND table_schema == '{schema_obj.name}' AND table_type <> 'VIEW'",
177
+ ).collect()
178
+ for table_row in tables_df:
179
+ table_name = table_row["table_name"]
180
+ table_tags_list = self._execute_sql(
181
+ f"""SELECT tag_name, tag_value FROM {catalog_name}.information_schema.table_tags
182
+ WHERE
183
+ catalog_name == '{catalog_name}'
184
+ AND schema_name == '{schema_obj.name}'
185
+ AND table_name == '{table_name}'
186
+ """,
187
+ ).collect()
188
+ table_tags = {r["tag_name"]: r["tag_value"] for r in table_tags_list}
189
+
190
+ table = Table(
191
+ data_source_format=table_row["data_source_format"],
192
+ identifier=f"{catalog}.{schema_obj.name}.{table_name}",
193
+ business_properties=table_tags,
194
+ columns=[],
195
+ )
196
+ table = self.add_columns_to_table(table)
197
+ schema_obj.add_table(table)
198
+ return schema_obj
199
+
200
+ def add_columns_to_table(self, table: Table) -> Table:
201
+ """Add columns to a table by retrieving column metadata from the information schema.
202
+
203
+ This method retrieves column details for the specified `table` from the
204
+ information schema and adds `Column` objects to the `Table`. It also identifies
205
+ primary key columns for the table.
206
+
207
+ Args:
208
+ table: The `Table` object to which columns will be added. The
209
+ `Table` object must have its `identifier` attribute set.
210
+
211
+ Returns:
212
+ The updated `Table` object with columns added.
213
+ """
214
+ if not table.identifier:
215
+ raise ValueError("Please set the Identifier of the Table to use this method.")
216
+ cols_df = self._execute_sql(
217
+ f"""
218
+ SELECT * FROM {table.catalog}.information_schema.columns
219
+ WHERE table_name == '{table.name}'
220
+ AND table_schema == '{table.schema}'
221
+ ORDER BY ordinal_position
222
+ """,
223
+ ).collect()
224
+ partition_cols_indexed = {}
225
+ for col_row in cols_df:
226
+ generated = "GENERATED ALWAYS AS IDENTITY" if col_row["is_identity"] == "YES" else None
227
+ table.add_column(
228
+ Column(
229
+ name=col_row["column_name"],
230
+ data_type=col_row["data_type"],
231
+ default_value=col_row["column_default"],
232
+ generated=generated,
233
+ nullable=col_row["is_nullable"] == "YES",
234
+ ),
235
+ )
236
+ if col_row["partition_index"] is not None:
237
+ partition_cols_indexed.update({str(col_row["partition_index"]): col_row["column_name"]})
238
+ partitioned_by = [val for _, val in sorted(partition_cols_indexed.items())]
239
+ if partitioned_by:
240
+ table.liquid_clustering = False
241
+ table.partition_by = partitioned_by
242
+ table = self._identify_pk_columns(table)
243
+ table = self._identify_fk_constraints(table)
244
+ return table
245
+
246
+ def _identify_pk_columns(self, table: Table) -> Table:
247
+ result = self._execute_sql(
248
+ f"""
249
+ SELECT A.column_name
250
+ FROM {table.catalog}.information_schema.key_column_usage AS A
251
+ JOIN {table.catalog}.information_schema.table_constraints AS B
252
+ USING (constraint_catalog, constraint_schema, constraint_name)
253
+ WHERE
254
+ A.table_catalog = '{table.catalog}'
255
+ AND A.table_schema = '{table.schema}'
256
+ AND A.table_name = '{table.name}'
257
+ AND B.constraint_type = 'PRIMARY KEY'
258
+ """,
259
+ ).collect()
260
+ table.composite_primary_key = [col_row["column_name"] for col_row in result]
261
+ return table
262
+
263
+ def _identify_fk_constraints(self, table: Table) -> Table:
264
+ result = self._execute_sql(
265
+ f"""
266
+ SELECT
267
+ concat_ws(".", C.table_catalog, C.table_schema, C.table_name) as source_table_identifier,
268
+ C.column_name as source_column,
269
+ concat_ws(".", B.table_catalog, B.table_schema, B.table_name) as parent_table_identifier,
270
+ B.column_name as parent_column
271
+ -- fk_option currently not supported
272
+ -- ,concat_ws(" ",D.match_option, "ON UPDATE", D.update_rule, "ON DELETE", D.delete_rule) AS fk_options
273
+ FROM {table.catalog}.information_schema.table_constraints AS A
274
+ LEFT JOIN {table.catalog}.information_schema.constraint_column_usage AS B USING(constraint_name)
275
+ LEFT JOIN {table.catalog}.information_schema.key_column_usage AS C USING(constraint_name)
276
+ -- LEFT JOIN {table.catalog}.information_schema.referential_constraints AS D USING(constraint_name)
277
+ WHERE
278
+ A.table_catalog == '{table.catalog}'
279
+ AND A.table_schema = '{table.schema}'
280
+ AND A.table_name == '{table.name}'
281
+ AND A.constraint_type == "FOREIGN KEY"
282
+ """,
283
+ ).collect()
284
+ table.foreign_keys = [
285
+ ForeignKey(
286
+ foreign_key_columns=fk_row["source_column"],
287
+ parent_table=fk_row["parent_table_identifier"],
288
+ parent_columns=fk_row["parent_column"],
289
+ )
290
+ for fk_row in result
291
+ ]
292
+ return table
@@ -0,0 +1,10 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ @dataclass
5
+ class Catalog:
6
+ """A class representing a Unity Catalog - Catalog."""
7
+
8
+ name: str
9
+ owner: str = ""
10
+ comment: str = ""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -17,6 +17,7 @@ class Schema(ReadInstancesMixin):
17
17
  storage_path: str | None = None
18
18
  tables: list[Table] = Field(default_factory=list)
19
19
  properties: dict[str, Any] = Field(default_factory=dict)
20
+ comment: str | None = None
20
21
 
21
22
  @classmethod
22
23
  def read_instance_from_file(
@@ -74,3 +75,21 @@ class Schema(ReadInstancesMixin):
74
75
  raise ValueError(f"Table {table_name} not found in {self.catalog}.{self.name} metadata.")
75
76
 
76
77
  return table
78
+
79
+ def add_table(self, table: Table):
80
+ """Adds a table to the schema and sets the table identifier accordingly.
81
+
82
+ Args:
83
+ table: A Table object that is added to the Schema tables.
84
+ """
85
+ table.identifier = f"{self.catalog}.{self.name}.{table.name}"
86
+ self.tables.append(table)
87
+
88
+ def add_tables(self, tables: list[Table]) -> None:
89
+ """Adds tables to the schema.
90
+
91
+ Args:
92
+ tables: A list of Table objects that are added to the Schema tables.
93
+ """
94
+ for table in tables:
95
+ self.add_table(table)
@@ -24,11 +24,14 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
24
24
  is_external: bool | None = None
25
25
  partition_by: list[str] = Field(default_factory=list)
26
26
  liquid_clustering: bool | None = None
27
+ composite_primary_key: list[str] = Field(default_factory=list)
27
28
  properties: dict[str, str] = Field(default_factory=dict)
28
29
  constraints: list[Constraint] = Field(default_factory=list)
29
30
  foreign_keys: list[ForeignKey] = Field(default_factory=list)
30
31
  storage_path: Path | None = None
32
+ business_properties: dict[str, str] = Field(default_factory=dict)
31
33
  comment: str | None = None
34
+ data_source_format: str | None = None
32
35
 
33
36
  def model_post_init(self, __context: Any) -> None:
34
37
  """Post init method for the Table model."""
@@ -87,10 +90,8 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
87
90
 
88
91
  @model_validator(mode="after")
89
92
  def _validate_is_external(cls, table: Self):
90
- """If is_external is set to False, storage_path has to be None."""
91
- if not table.is_external and table.storage_path is not None:
92
- raise ValueError("is_external cannot be false while storage_path is set.")
93
- elif table.is_external and table.storage_path is None:
93
+ """If is_external is set to True, storage_path has to be set."""
94
+ if table.is_external and table.storage_path is None:
94
95
  raise ValueError("is_external cannot be true while storage_path is None.")
95
96
 
96
97
  @classmethod
@@ -222,7 +223,7 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
222
223
 
223
224
  def get_create_statement(
224
225
  self,
225
- templates: Path = Path("./templates"),
226
+ templates: Path = Path("./src/cloe_nessy/models/templates/"),
226
227
  template_name: str = "create_table.sql.j2",
227
228
  replace: bool = True,
228
229
  ):
@@ -234,3 +235,47 @@ class Table(TemplateLoaderMixin, ReadInstancesMixin, LoggerMixin):
234
235
  raise err
235
236
  render = template.render(table=self, replace=replace)
236
237
  return render
238
+
239
+ def get_column_by_name(self, column_name: str) -> Column | None:
240
+ """Get a column by name.
241
+
242
+ Args:
243
+ column_name: The name of the column to get.
244
+
245
+ Returns:
246
+ The column if found, else None.
247
+ """
248
+ for column in self.columns:
249
+ if column.name == column_name:
250
+ return column
251
+ return None
252
+
253
+ def update_column(self, column: Column) -> None:
254
+ """Replaces a Column with a new Column object to update it.
255
+
256
+ Args:
257
+ column: The new column object, to replace the old one.
258
+ """
259
+ self.remove_column(column)
260
+ self.add_column(column)
261
+
262
+ def add_column(self, column: Column):
263
+ """Adds a column to the table.
264
+
265
+ Args:
266
+ column: The column to be added.
267
+ """
268
+ self.columns.append(column)
269
+
270
+ def remove_column(self, column: str | Column) -> None:
271
+ """Remove a column from the Table.
272
+
273
+ Args.
274
+ column: The column to be removed.
275
+ """
276
+ if isinstance(column, Column):
277
+ column_name = column.name
278
+ else:
279
+ column_name = column
280
+
281
+ self.columns = [col for col in self.columns if col.name != column_name]
File without changes
@@ -0,0 +1,67 @@
1
+ from pathlib import Path
2
+ from typing import Any
3
+
4
+ from jinja2 import TemplateNotFound
5
+ from pydantic import BaseModel, field_validator
6
+
7
+ from ..logging import LoggerMixin
8
+ from .mixins.template_loader_mixin import TemplateLoaderMixin
9
+
10
+
11
+ class Volume(TemplateLoaderMixin, LoggerMixin, BaseModel):
12
+ """Volume class for managing volumes."""
13
+
14
+ identifier: str
15
+ storage_path: str | Path
16
+ comment: str | None = None
17
+
18
+ @field_validator("identifier")
19
+ def check_identifier(cls, value):
20
+ """Check the identifier."""
21
+ if value.count(".") != 2:
22
+ raise ValueError("The identifier must be in the format 'catalog.schema.volume_name'.")
23
+ return value
24
+
25
+ @property
26
+ def storage_identifier(self) -> str:
27
+ """Return the storage identifier."""
28
+ return f"/Volumes/{self.catalog}/{self.schema}/{self.name}/"
29
+
30
+ @property
31
+ def catalog(self) -> str:
32
+ """Return the catalog name."""
33
+ return self.identifier.split(".")[0]
34
+
35
+ @property
36
+ def schema_name(self) -> str:
37
+ """Return the schema name."""
38
+ return self.identifier.split(".")[1]
39
+
40
+ @property
41
+ def name(self) -> str:
42
+ """Return the table name."""
43
+ return self.identifier.split(".")[2]
44
+
45
+ @property
46
+ def escaped_identifier(self) -> str:
47
+ """Return the escaped identifier."""
48
+ return f"`{self.catalog}`.`{self.schema_name}`.`{self.name}`"
49
+
50
+ def model_post_init(self, __context: Any) -> None:
51
+ """Post init method for the Table model."""
52
+ self._console_logger = self.get_console_logger()
53
+ self._console_logger.debug(f"Model for volume [ '{self.identifier}' ] has been initialized.")
54
+
55
+ def get_create_statement(
56
+ self,
57
+ templates: Path = Path("./src/cloe_nessy/models/templates/"),
58
+ template_name: str = "create_volume.sql.j2",
59
+ ):
60
+ """Get the create statement for the Volume."""
61
+ try:
62
+ template = self.get_template(templates, template_name)
63
+ except TemplateNotFound as err:
64
+ self._console_logger.error(f"Template [ {template_name} ] not found.")
65
+ raise err
66
+ render = template.render(volume=self)
67
+ return render
@@ -1,3 +1,8 @@
1
- from .table_manager import TableManager
1
+ from .table_manager import TableManager, table_log_decorator
2
+ from .volume_manager import VolumeManager
2
3
 
3
- __all__ = ["TableManager"]
4
+ __all__ = [
5
+ "TableManager",
6
+ "table_log_decorator",
7
+ "VolumeManager",
8
+ ]