sandwich 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sandwich/dialects/base.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import Tuple
3
3
 
4
- from sqlalchemy import Table
4
+ from sqlalchemy import Table, TextClause
5
5
 
6
6
  class DialectHandler(ABC):
7
7
  @abstractmethod
@@ -64,7 +64,16 @@ class DialectHandler(ABC):
64
64
  pass
65
65
 
66
66
  @abstractmethod
67
- def make_sat_proc(
67
+ def make_link_proc(
68
+ self,
69
+ link_table: Table,
70
+ hk_keys: list,
71
+ header: str
72
+ ) -> Tuple[str, str, str]:
73
+ pass
74
+
75
+ @abstractmethod
76
+ def make_scd2_sat_proc(
68
77
  self,
69
78
  sat_table: Table,
70
79
  hk_name: str,
@@ -74,25 +83,14 @@ class DialectHandler(ABC):
74
83
  stg_schema: str,
75
84
  header: str
76
85
  ) -> Tuple[str, str, str]:
77
- """Generate satellite population procedure.
78
-
79
- Args:
80
- sat_table: SQLAlchemy Table object for satellite
81
- hk_name: Hash key column name
82
- hashdiff_col: Hash diff column name
83
- is_available_col: Is available column name
84
- loaddate_col: Load date column name
85
- columns_list: Comma-separated list of columns
86
- stg_schema: Staging schema name ('stg' or 'proxy')
87
- header: Auto-generated header comment
86
+ pass
88
87
 
89
- Returns:
90
- Tuple of (procedure_code, procedure_name)
91
- """
88
+ @abstractmethod
89
+ def make_scd0_sat_proc(self, sat_table: Table, header: str) -> Tuple[str, str, str]:
92
90
  pass
93
91
 
94
92
  @abstractmethod
95
- def make_dim_scd2_proc(
93
+ def make_scd2_dim_proc(
96
94
  self,
97
95
  dim_table: Table,
98
96
  bk_keys: list,
@@ -115,10 +113,7 @@ class DialectHandler(ABC):
115
113
  def make_job_proc(
116
114
  self,
117
115
  entity_name: str,
118
- hub_proc_name: str,
119
- sat_proc_name: str,
120
- dim_proc_name: str,
121
- stg_proc_name: str | None,
116
+ proc_names: list[str],
122
117
  header: str
123
118
  ) -> Tuple[str, str, str]:
124
119
  """Generate main job orchestration procedure.
@@ -137,30 +132,5 @@ class DialectHandler(ABC):
137
132
  pass
138
133
 
139
134
  @abstractmethod
140
- def make_drop_proc(
141
- self,
142
- entity_name: str,
143
- stg_schema: str,
144
- job_proc_name: str,
145
- stg_proc_name: str | None,
146
- hub_proc_name: str,
147
- sat_proc_name: str,
148
- dim_proc_name: str,
149
- header: str
150
- ) -> Tuple[str, str, str]:
151
- """Generate cleanup/drop procedure for all entity objects.
152
-
153
- Args:
154
- entity_name: Entity name
155
- stg_schema: Staging schema name ('stg' or 'proxy')
156
- job_proc_name: Name of job orchestration procedure
157
- stg_proc_name: Name of staging materialization procedure (optional)
158
- hub_proc_name: Name of hub population procedure
159
- sat_proc_name: Name of satellite population procedure
160
- dim_proc_name: Name of dimension recalculation procedure
161
- header: Auto-generated header comment
162
-
163
- Returns:
164
- Tuple of (procedure_code, procedure_name)
165
- """
166
- pass
135
+ def make_drop_proc(self, entity_name, table_schemas: list[str], procedures: list[str], header: str) \
136
+ -> Tuple[str, str, str]: ...
@@ -15,33 +15,6 @@ begin
15
15
  end
16
16
  """
17
17
 
18
- # language=sql
19
- create_proc_register_entity = """
20
- create or alter proc core.register_entity (
21
- @entity_name varchar(100),
22
- @template varchar(50)
23
- ) as
24
- begin
25
- set nocount on;
26
-
27
- if exists (
28
- select *
29
- from core.[entities]
30
- where [entity_name] = @entity_name
31
- )
32
- begin
33
- update core.[entities]
34
- set [updated] = sysdatetime(), [is_deleted] = 0
35
- where [entity_name] = @entity_name
36
- end
37
- else begin
38
- insert into core.[entities]
39
- ([entity_name], [template])
40
- values (@entity_name, @template)
41
- end
42
- end
43
- """
44
-
45
18
  # language=sql
46
19
  create_table_ExecutionLog = """
47
20
  if object_id('core.ExecutionLog') is null
@@ -101,6 +74,8 @@ if schema_id('hub') is null
101
74
  exec ('create schema hub')
102
75
  if schema_id('sat') is null
103
76
  exec ('create schema sat')
77
+ if schema_id('link') is null
78
+ exec ('create schema link')
104
79
  if schema_id('dim') is null
105
80
  exec ('create schema dim')
106
81
  if schema_id('fact') is null
@@ -17,24 +17,6 @@ CREATE TABLE IF NOT EXISTS core.entities (
17
17
  );
18
18
  """
19
19
 
20
- # language=sql
21
- create_proc_register_entity = """
22
- CREATE OR REPLACE PROCEDURE core.register_entity(
23
- p_entity_name VARCHAR(100),
24
- p_gen_path VARCHAR(50)
25
- )
26
- LANGUAGE plpgsql
27
- AS $$
28
- BEGIN
29
- INSERT INTO core.entities (entity_name, template)
30
- VALUES (p_entity_name, p_gen_path)
31
- ON CONFLICT (entity_name) DO UPDATE
32
- SET updated = NOW(),
33
- is_deleted = FALSE;
34
- END;
35
- $$;
36
- """
37
-
38
20
  # language=sql
39
21
  create_func_StringToHash= """
40
22
  create or replace function core.string_to_hash1(str_value text)
@@ -1,7 +1,11 @@
1
1
  """MSSQL dialect handler for SQL code generation."""
2
2
  from typing import Tuple
3
3
 
4
- from sqlalchemy import dialects, Table
4
+ from sqlalchemy import dialects, Table, text
5
+
6
+ #from sandwich import SANDWICH_VERSION
7
+ #from sandwich.modeling import modeling_metadata
8
+ #from sandwich.modeling.strategies.base import ValidationResult
5
9
 
6
10
  from .base import DialectHandler
7
11
  from .utils import get_columns_list
@@ -103,7 +107,47 @@ end
103
107
  proc_code = self.apply_proc_template(proc_name, proc_body, header)
104
108
  return proc_code, proc_name, f"exec {proc_name}"
105
109
 
106
- def make_sat_proc(
110
+ def make_link_proc(self, link_table: Table, hk_keys: list, header: str) -> Tuple[str, str, str]:
111
+ proc_name = self.get_proc_name_format("elt", f"Populate_{link_table.schema}", link_table.name)
112
+ where_fields_list_str = "\n\t\tand ".join([f"link.[{hk[0]}] = stg.[{hk[0]}]" for hk in hk_keys if hk[0] != f"hk_{link_table.name}"])
113
+ columns_list = get_columns_list(link_table)
114
+
115
+ # language=sql
116
+ proc_body = f"""
117
+ insert into [{link_table.schema}].[{link_table.name}]
118
+ ({columns_list})
119
+ select distinct {get_columns_list(link_table, alias="stg")}
120
+ from stg.[{link_table.name}] as stg
121
+ where not exists (
122
+ select *
123
+ from [{link_table.schema}].[{link_table.name}] as link
124
+ where {where_fields_list_str}
125
+ );
126
+ """
127
+ proc_code = self.apply_proc_template(proc_name, proc_body, header)
128
+ return proc_code, proc_name, f"exec {proc_name}"
129
+
130
+ def make_scd0_sat_proc(self, sat_table: Table, header: str) -> Tuple[str, str, str]:
131
+ proc_name = self.get_proc_name_format("elt", f"Populate_{sat_table.schema}", sat_table.name)
132
+ columns_list = get_columns_list(sat_table)
133
+ hk_name = f"hk_{sat_table.name}"
134
+
135
+ # language=sql
136
+ proc_body = f"""
137
+ insert into [{sat_table.schema}].[{sat_table.name}]
138
+ ({columns_list})
139
+ select {get_columns_list(sat_table, alias="stg")}
140
+ from stg.[{sat_table.name}] stg
141
+ where not exists (
142
+ select *
143
+ from sat.[{sat_table.name}] sat
144
+ where stg.[{hk_name}] = sat.[{hk_name}]
145
+ )
146
+ """
147
+ proc_code = self.apply_proc_template(proc_name, proc_body, header)
148
+ return proc_code, proc_name, f"exec {proc_name}"
149
+
150
+ def make_scd2_sat_proc(
107
151
  self,
108
152
  sat_table: Table,
109
153
  hk_name: str,
@@ -175,12 +219,7 @@ end
175
219
  proc_code = self.apply_proc_template(proc_name, proc_body, header)
176
220
  return proc_code, proc_name, f"exec {proc_name}"
177
221
 
178
- def make_dim_scd2_proc(
179
- self,
180
- dim_table: Table,
181
- bk_keys: list,
182
- header: str
183
- ) -> Tuple[str, str, str]:
222
+ def make_scd2_dim_proc(self, dim_table: Table, bk_keys: list, header: str) -> Tuple[str, str, str]:
184
223
  proc_name = self.get_proc_name_format("elt", f"Recalculate_{dim_table.schema}", dim_table.name)
185
224
  columns_list = get_columns_list(dim_table)
186
225
  pk_keys = lambda: ", ".join([f"sat.[{bk[0]}]" for bk in bk_keys])
@@ -210,62 +249,33 @@ end
210
249
  proc_code = self.apply_proc_template(proc_name, proc_body, header)
211
250
  return proc_code, proc_name, f"exec {proc_name}"
212
251
 
213
- def make_job_proc(
214
- self,
215
- entity_name: str,
216
- hub_proc_name: str,
217
- sat_proc_name: str,
218
- dim_proc_name: str,
219
- stg_proc_name: str | None,
220
- header: str
221
- ) -> Tuple[str, str, str]:
222
- """Generate MSSQL job orchestration procedure."""
252
+ def make_job_proc(self, entity_name: str, proc_names: list[str], header: str) -> Tuple[str, str, str]:
223
253
  proc_name = f"[job].[Run_all_related_to_{entity_name}]"
254
+ proc_body = "\n\t"
255
+ for proc in proc_names:
256
+ if proc is None: continue
257
+ proc_body += f"exec {proc} @executionID;\n\t"
224
258
 
225
- stg_call = f" exec {stg_proc_name} @executionID;\n" if stg_proc_name else ""
226
-
227
- # language=sql
228
- proc_body = f"""
229
- {stg_call} exec {hub_proc_name} @executionID;
230
- exec {sat_proc_name} @executionID;
231
- exec {dim_proc_name} @executionID;
232
- """
233
259
  proc_code = self.apply_proc_template(proc_name, proc_body, header)
234
260
  return proc_code, proc_name, f"exec {proc_name}"
235
261
 
236
- def make_drop_proc(
237
- self,
238
- entity_name: str,
239
- stg_schema: str,
240
- job_proc_name: str,
241
- stg_proc_name: str | None,
242
- hub_proc_name: str,
243
- sat_proc_name: str,
244
- dim_proc_name: str,
245
- header: str
246
- ) -> Tuple[str, str, str]:
247
- """Generate MSSQL cleanup/drop procedure."""
262
+ def make_drop_proc(self, entity_name, table_schemas: list[str], procedures: list[str], header: str) -> Tuple[str, str, str]:
248
263
  proc_name = f"[meta].[Drop_all_related_to_{entity_name}]"
249
-
250
- stg_drops = f"""
251
- drop table if exists [stg].[{entity_name}];
252
- drop procedure if exists {stg_proc_name};
253
- """ if stg_schema == "proxy" else ""
254
-
255
- # language=sql
256
- proc_body = f"""{stg_drops}
257
- drop table if exists [dim].[{entity_name}];
258
- drop procedure if exists {dim_proc_name};
259
- drop table if exists [sat].[{entity_name}];
260
- drop procedure if exists {sat_proc_name};
261
- drop table if exists [hub].[{entity_name}];
262
- drop procedure if exists {hub_proc_name};
263
- drop procedure if exists {job_proc_name};
264
-
265
- update core.[entities]
264
+ proc_body = "\n\t"
265
+ for proc in procedures:
266
+ if proc is None: continue
267
+ proc_body += f"drop procedure if exists {proc};\n\t"
268
+ proc_body += "\n\t"
269
+ for schema in table_schemas:
270
+ proc_body += f"drop table if exists [{schema}].[{entity_name}];\n\t"
271
+ proc_body += "\n\t"
272
+ proc_body += \
273
+ f"""update core.[entities]
266
274
  set [deleted] = sysdatetime()
267
275
  , [is_deleted] = 1
268
276
  where [entity_name] = '{entity_name}'
269
277
  """
278
+
270
279
  proc_code = self.apply_proc_template(proc_name, proc_body, header)
271
280
  return proc_code, proc_name, f"exec {proc_name}"
281
+
@@ -1,7 +1,6 @@
1
- """Postgres dialect handler for SQL code generation."""
2
1
  from typing import Tuple
3
2
 
4
- from sqlalchemy import dialects, Table
3
+ from sqlalchemy import dialects, Table, text
5
4
 
6
5
  from src.sandwich.dialects.base import DialectHandler
7
6
 
@@ -49,7 +48,7 @@ class PostgresDialectHandler(DialectHandler):
49
48
  # TODO: Implement using INSERT...ON CONFLICT or NOT EXISTS pattern
50
49
  raise NotImplementedError("Postgres hub procedure not yet implemented")
51
50
 
52
- def make_sat_proc(
51
+ def make_sdc2_sat_proc(
53
52
  self,
54
53
  sat_table: Table,
55
54
  hk_name: str,
@@ -65,7 +64,7 @@ class PostgresDialectHandler(DialectHandler):
65
64
  # Use BOOLEAN type instead of BIT
66
65
  raise NotImplementedError("Postgres satellite procedure not yet implemented")
67
66
 
68
- def make_dim_scd2_proc(
67
+ def make_scd2_dim_proc(
69
68
  self,
70
69
  dim_table: Table,
71
70
  bk_keys: list,
@@ -89,7 +89,6 @@ def initialize_database(conn: Engine | Connection, dialect: str = "mssql",
89
89
  if drop_entities_table:
90
90
  init_scripts["drop_entities_table"] = "drop table if exists [core].[entities];"
91
91
  init_scripts["create_entities_table"] = ddl_mssql.create_entities_table
92
- init_scripts["create_proc_register_entity"] = header + ddl_mssql.create_proc_register_entity
93
92
  init_scripts["create_func_StringToHash1"] = header + ddl_mssql.create_func_StringToHash
94
93
  for i in range(2, str_to_hash_count):
95
94
  init_scripts[f"create_func_StringToHash{i}"] = header + get_string_to_hash_ddl_mssql(i)
@@ -102,7 +101,6 @@ def initialize_database(conn: Engine | Connection, dialect: str = "mssql",
102
101
  if drop_entities_table:
103
102
  init_scripts["drop_entities_table"] = "drop table if exists core.entities"
104
103
  init_scripts["create_entities_table"] = ddl_postgres.create_entities_table
105
- init_scripts["create_proc_register_entity"] = ddl_postgres.create_proc_register_entity
106
104
  init_scripts["create_func_StringToHash1"] = ddl_postgres.create_func_StringToHash
107
105
  for i in range(2, str_to_hash_count):
108
106
  init_scripts[f"create_func_StringToHash{i}"] = get_string_to_hash_ddl_postgres(i)
@@ -146,4 +144,4 @@ def parse_auto_generated_header(full_proc_text: str) -> dict[str, Any]:
146
144
  started = True
147
145
  continue
148
146
  result["rows_in_header"] = rows_in_header - 1 if rows_in_header > 0 else 0
149
- return result
147
+ return result
@@ -0,0 +1,82 @@
1
+ from datetime import datetime
2
+
3
+ from sqlalchemy import Connection, Engine, MetaData, select, Table, text, RowMapping, Sequence
4
+
5
+ from sandwich.dialects import DialectHandlerFactory
6
+ from sandwich.modeling import get_stg_info, infer_template, Dv2SystemInfo, Dv2Entity, StgInfo
7
+ from sandwich.modeling.strategies import SchemaGenerator, StrategyFactory
8
+
9
+
10
+ def _register_entity(entity_name: str, template: str, conn: Engine | Connection,
11
+ verbose: bool = False) -> datetime:
12
+ entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
13
+ created_result = conn.execute(select(entities.c.created).where(entity_name == entities.c.entity_name)).scalar_one_or_none()
14
+
15
+ if created_result is None:
16
+ created_result = datetime.now()
17
+ conn.execute(entities.insert().values(entity_name=entity_name, template=template, created=created_result))
18
+ if verbose:
19
+ print(f"[ok] Registered `{entity_name}` for `{template}`")
20
+ else:
21
+ _update_entity(entity_name, conn, entities, verbose=verbose)
22
+
23
+ return created_result
24
+
25
+ def _update_entity(entity_name: str, conn: Engine | Connection, sys_entities: Table, verbose: bool = False) -> None:
26
+ conn.execute(
27
+ sys_entities.update().where(entity_name == sys_entities.c.entity_name).values(updated=datetime.now(), is_deleted=False))
28
+ if verbose:
29
+ print(f"[ok] Updated `{entity_name}`")
30
+
31
+
32
+ def generate_schema(schema_generator: SchemaGenerator, registered_on: datetime, conn: Engine | Connection,
33
+ verbose: bool = False) -> None:
34
+ tables = schema_generator.make_tables()
35
+ for table_type, table in tables.items():
36
+ if table is not None:
37
+ table.create(conn, checkfirst=True)
38
+ if verbose:
39
+ print(f"[ok] Created table [{table.schema}].[{table.name}]")
40
+
41
+ procedures = schema_generator.make_procedures(tables, registered_on)
42
+ for proc_type, (proc_code, proc_name, _) in procedures.items():
43
+ conn.execute(text(proc_code))
44
+ if verbose:
45
+ print(f"[ok] Created or altered {proc_name}")
46
+
47
+
48
+ def _generate_schema_for_entity(stg_info: StgInfo, conn: Engine | Connection, dialect: str,
49
+ registered_on: datetime, template: str | None, verbose: bool = False) -> None:
50
+ validator = StrategyFactory.create_validator(template)
51
+ sys_info = get_system_info(conn)
52
+ validation_result = validator.validate_staging(stg_info, sys_info)
53
+ dialect_handler = DialectHandlerFactory.create_handler(dialect)
54
+ schema_generator = StrategyFactory.create_generator(dialect_handler, validation_result)
55
+ generate_schema(schema_generator, registered_on, conn, verbose=verbose)
56
+
57
+
58
+ def register_and_create_entity(entity_name: str, conn: Engine | Connection, dialect: str, template: str | None = None,
59
+ schema: str = "stg", verbose: bool = False) -> None:
60
+ stg_info = get_stg_info(entity_name, schema, conn)
61
+ if template is None:
62
+ template = infer_template(stg_info)
63
+ registered_on = _register_entity(entity_name, template, conn)
64
+ _generate_schema_for_entity(stg_info, conn, dialect, registered_on, template, verbose=verbose)
65
+
66
+
67
+ def update_registered_entities(conn: Engine | Connection, dialect: str, schema: str = "stg",
68
+ verbose: bool = False) -> None:
69
+ sys_info = get_system_info(conn)
70
+ for en in sys_info.entities_list:
71
+ stg_info = get_stg_info(en.entity_name, schema, conn)
72
+ _update_entity(en.entity_name, conn, sys_info.sys_entities, verbose=verbose)
73
+ _generate_schema_for_entity(stg_info, conn, dialect, en.created_on, en.template, verbose=verbose)
74
+
75
+
76
+ def get_system_info(conn: Engine | Connection):
77
+ sys_entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
78
+ select_result = conn.execute(sys_entities.select().where(~sys_entities.c.is_deleted))
79
+ return Dv2SystemInfo(
80
+ [Dv2Entity(en["entity_name"], en["template"], en["created"]) for en in select_result.mappings().all()],
81
+ sys_entities
82
+ )
@@ -1,7 +1,15 @@
1
1
  from dataclasses import dataclass
2
- from typing import Any
2
+ from datetime import datetime
3
+ from typing import Any, Tuple
4
+
5
+ from sqlalchemy import Table, Engine, Connection, MetaData
6
+
7
+ @dataclass(frozen=True)
8
+ class Dv2Entity:
9
+ entity_name: str
10
+ template: str
11
+ created_on: datetime
3
12
 
4
- from sqlalchemy import Table
5
13
 
6
14
  @dataclass(frozen=True)
7
15
  class StgInfo:
@@ -11,6 +19,12 @@ class StgInfo:
11
19
  bk_keys: dict[str, Any]
12
20
  sys_columns: dict[str, Any]
13
21
  bus_columns: dict[str, Any]
22
+ degenerate_field: Tuple[str, Any] | None = None
23
+
24
+ @dataclass(frozen=True)
25
+ class Dv2SystemInfo:
26
+ entities_list: list[Dv2Entity]
27
+ sys_entities: Table
14
28
 
15
29
  class Dv2ModelingMetadata:
16
30
  HEADER_TEMPLATE = """/*
@@ -58,24 +72,26 @@ class Dv2ModelingMetadata:
58
72
  #self.column_types = self._dialects_config[self.dialect]
59
73
  self.required_columns: list[str] = [self.loaddate, self.recordsource]
60
74
 
61
-
62
75
  modeling_metadata = Dv2ModelingMetadata()
63
76
 
64
- def get_stg_info(stg: Table) -> StgInfo:
77
+ def get_stg_info(entity_name: str, schema: str, conn: Engine | Connection) -> StgInfo:
78
+ stg = Table(entity_name, MetaData(), schema=schema, autoload_with=conn)
79
+
65
80
  hk_keys: dict[str, Any] = {}
66
81
  bk_keys: dict[str, Any] = {}
67
82
  sys_columns: dict[str, Any] = {}
68
83
  bus_columns: dict[str, Any] = {}
84
+ degenerate_field: Tuple[str, Any] | None = None
69
85
 
70
86
  for col in stg.columns.values():
71
- if col.name.startswith("hk_"):
87
+ if col.name.startswith("hk_"): # hash key
72
88
  hk_keys[col.name] = col.type
73
- elif col.name.startswith("bk_"):
89
+ elif col.name.startswith("bk_"): # business key
74
90
  bk_keys[col.name] = col.type
75
- elif col.name.startswith("sg_"):
91
+ elif col.name.startswith("dg_"): # degenerate field (transactional links only)
92
+ degenerate_field = (col.name, col.type)
93
+ elif col.name.startswith("sg_"): # surrogate key
76
94
  raise Exception(f"sg column '{col.name}' is not implemented yet")
77
- elif col.name.startswith("ts_"):
78
- raise Exception(f"ts column '{col.name}' is not implemented yet")
79
95
  elif col.name in modeling_metadata.names:
80
96
  # type_name = metadata.column_types[col.name]
81
97
  # if not str(col.type).startswith(type_name):
@@ -91,6 +107,7 @@ def get_stg_info(stg: Table) -> StgInfo:
91
107
  bk_keys=bk_keys,
92
108
  sys_columns=sys_columns,
93
109
  bus_columns=bus_columns,
110
+ degenerate_field=degenerate_field,
94
111
  )
95
112
 
96
113
  def infer_template(stg_info: StgInfo):
@@ -0,0 +1,94 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Any, Callable, Tuple
5
+
6
+ from sqlalchemy import Table
7
+
8
+ from sandwich.modeling import Dv2SystemInfo, modeling_metadata, StgInfo
9
+
10
+ @dataclass(frozen=True)
11
+ class ValidationResult:
12
+ stg_schema: str
13
+ entity_name: str
14
+ bk_keys: list[Tuple[str, Any]]
15
+ hk_keys: list[Tuple[str, Any]]
16
+ business_column_types: dict[str, Any]
17
+ system_column_types: dict[str, Any]
18
+ template: str
19
+ degenerate_field: Tuple[str, Any] | None = None
20
+
21
+ class Validator(ABC):
22
+ @abstractmethod
23
+ def validate_staging(self, stg_info: StgInfo, sys_info: Dv2SystemInfo, verbose: bool = False) -> ValidationResult:
24
+ pass
25
+
26
+ class BaseValidator(Validator):
27
+ def __init__(self, template: str):
28
+ self._on_validate_staging: Callable[[StgInfo, Dv2SystemInfo], None] | None = None
29
+ self.template = template
30
+
31
+ def validate_staging(self, stg_info: StgInfo, sys_info: Dv2SystemInfo, verbose: bool = False) -> ValidationResult:
32
+ """Validate staging table or view for `scd2dim` template.
33
+
34
+ Raises: Exception"""
35
+ if verbose:
36
+ raise Exception("verbose is not implemented yet")
37
+
38
+ system_column_names = stg_info.sys_columns.keys()
39
+
40
+ # universal check - all dv2 raw objects should be auditable
41
+ for required_col in modeling_metadata.required_columns:
42
+ if required_col not in system_column_names:
43
+ raise Exception(f"{required_col} column is required")
44
+
45
+ if self._on_validate_staging is not None:
46
+ self._on_validate_staging(stg_info, sys_info)
47
+
48
+ # todo: ValidationResult is not required whatsoever
49
+ return ValidationResult(
50
+ stg_schema=stg_info.stg_schema,
51
+ entity_name=stg_info.stg_name,
52
+ bk_keys=[(nm, tp) for nm, tp in stg_info.bk_keys.items()],
53
+ hk_keys=[(nm, tp) for nm, tp in stg_info.hk_keys.items()],
54
+ degenerate_field = stg_info.degenerate_field,
55
+ business_column_types=stg_info.bus_columns,
56
+ system_column_types=stg_info.sys_columns,
57
+ template=self.template
58
+ )
59
+
60
+
61
+
62
+ class SchemaGenerator(ABC):
63
+ @abstractmethod
64
+ def make_tables(self) -> dict[str, Table]:
65
+ pass
66
+
67
+ @abstractmethod
68
+ def make_procedures(
69
+ self,
70
+ tables: dict[str, Table],
71
+ entity_registration_date: datetime = datetime.now()
72
+ ) -> dict[str, Tuple[str, str, str]]:
73
+ pass
74
+
75
+ # class BaseSchemaGenerator(SchemaGenerator):
76
+ # def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
77
+ # self.dialect_handler = dialect_handler
78
+ # self._validation_result = validation_result
79
+ # self._on_make_proc: Callable[[Table, datetime], Tuple[str, str, str]] | None = None
80
+ #
81
+ # def make_proc(self, tbl: Table, entity_registration_date: datetime) -> Tuple[str, str, str]:
82
+ # header = modeling_metadata.HEADER_TEMPLATE.format(
83
+ # created_on=entity_registration_date,
84
+ # updated_on=datetime.now(),
85
+ # version=SANDWICH_VERSION,
86
+ # entity_name=self._validation_result.entity_name
87
+ # )
88
+ #
89
+ # if self._validation_result.stg_schema == "proxy":
90
+ # stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
91
+ # entity_name=self._validation_result.entity_name,
92
+ # header=header
93
+ # )
94
+ # procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
@@ -22,10 +22,11 @@ class StrategyFactory:
22
22
  raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
23
23
 
24
24
  validator_class, _ = cls._strategies[template]
25
- return validator_class()
25
+ return validator_class(template)
26
26
 
27
27
  @classmethod
28
- def create_generator(cls, template: str, dialect_handler: DialectHandler, validation_result: ValidationResult) -> SchemaGenerator:
28
+ def create_generator(cls, dialect_handler: DialectHandler, validation_result: ValidationResult) -> SchemaGenerator:
29
+ template = validation_result.template
29
30
  if template not in cls._strategies:
30
31
  available = ", ".join(cls._strategies.keys())
31
32
  raise ValueError(f"Unknown template '{template}'. Available templates: {available}")