sandwich 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ from typing import Tuple
2
+
3
+ from sqlalchemy import dialects, Table, text
4
+
5
+ from src.sandwich.dialects.base import DialectHandler
6
+
7
+
8
+ class PostgresDialectHandler(DialectHandler):
9
+ """Dialect handler for PostgreSQL.
10
+
11
+ NOTE: This is a stub implementation. All methods need to be implemented
12
+ based on PostgreSQL syntax and conventions.
13
+ """
14
+
15
+ def get_boolean_type(self):
16
+ return dialects.postgresql.BOOLEAN
17
+
18
+ def get_proc_name_format(self, schema: str, operation: str, entity_name: str) -> str:
19
+ """Get Postgres procedure naming format."""
20
+ # Postgres uses lowercase with underscores by convention
21
+ operation_lower = operation.lower()
22
+ return f"{schema}.{operation_lower}_{entity_name}"
23
+
24
+ def apply_proc_template(self, proc_name: str, sql_body: str, header: str) -> str:
25
+ """Wrap SQL body in Postgres procedure template with error handling."""
26
+ # TODO: Implement Postgres procedure template
27
+ # Postgres uses CREATE OR REPLACE PROCEDURE/FUNCTION with PL/pgSQL
28
+ # Error handling uses EXCEPTION blocks
29
+ # Logging integration needed
30
+ raise NotImplementedError("Postgres procedure template not yet implemented")
31
+
32
+ def make_stg_materialization_proc(
33
+ self,
34
+ entity_name: str,
35
+ header: str
36
+ ) -> Tuple[str, str]:
37
+ """Generate Postgres staging materialization procedure."""
38
+ # TODO: Implement using CREATE OR REPLACE and DROP/CREATE TABLE pattern
39
+ raise NotImplementedError("Postgres staging materialization not yet implemented")
40
+
41
+ def make_hub_proc(
42
+ self,
43
+ hub_table: Table,
44
+ bk_keys: list,
45
+ header: str
46
+ ) -> Tuple[str, str]:
47
+ """Generate Postgres hub population procedure."""
48
+ # TODO: Implement using INSERT...ON CONFLICT or NOT EXISTS pattern
49
+ raise NotImplementedError("Postgres hub procedure not yet implemented")
50
+
51
+ def make_sdc2_sat_proc(
52
+ self,
53
+ sat_table: Table,
54
+ hk_name: str,
55
+ hashdiff_col: str,
56
+ is_available_col: str,
57
+ loaddate_col: str,
58
+ stg_schema: str,
59
+ header: str
60
+ ) -> Tuple[str, str]:
61
+ """Generate Postgres satellite population procedure."""
62
+ # TODO: Implement using CTE and window functions (similar to MSSQL but with Postgres syntax)
63
+ # Use CURRENT_TIMESTAMP instead of SYSDATETIME()
64
+ # Use BOOLEAN type instead of BIT
65
+ raise NotImplementedError("Postgres satellite procedure not yet implemented")
66
+
67
+ def make_scd2_dim_proc(
68
+ self,
69
+ dim_table: Table,
70
+ bk_keys: list,
71
+ header: str
72
+ ) -> Tuple[str, str]:
73
+ """Generate Postgres dimension SCD2 recalculation procedure."""
74
+ # TODO: Implement using TRUNCATE and INSERT with window functions
75
+ # Use LAG/LEAD for SCD2 date calculations
76
+ # Use INTERVAL for date arithmetic instead of DATEADD
77
+ raise NotImplementedError("Postgres dimension procedure not yet implemented")
78
+
79
+ def make_job_proc(
80
+ self,
81
+ entity_name: str,
82
+ hub_proc_name: str,
83
+ sat_proc_name: str,
84
+ dim_proc_name: str,
85
+ stg_proc_name: str | None,
86
+ header: str
87
+ ) -> Tuple[str, str]:
88
+ """Generate Postgres job orchestration procedure."""
89
+ # TODO: Implement using CALL statements for other procedures
90
+ # Pass execution_id through procedure parameters
91
+ raise NotImplementedError("Postgres job procedure not yet implemented")
92
+
93
+ def make_drop_proc(
94
+ self,
95
+ entity_name: str,
96
+ stg_schema: str,
97
+ job_proc_name: str,
98
+ stg_proc_name: str | None,
99
+ hub_proc_name: str,
100
+ sat_proc_name: str,
101
+ dim_proc_name: str,
102
+ header: str
103
+ ) -> Tuple[str, str]:
104
+ """Generate Postgres cleanup/drop procedure."""
105
+ # TODO: Implement using DROP IF EXISTS for tables and procedures
106
+ # Update core.entities with deletion timestamp
107
+ raise NotImplementedError("Postgres drop procedure not yet implemented")
@@ -0,0 +1,147 @@
1
+ from datetime import datetime
2
+ from typing import Any
3
+
4
+ from sqlalchemy import Engine, Connection, Table, text, TextClause
5
+
6
+ from src.sandwich import SANDWICH_VERSION
7
+ from .. import errors as err
8
+ from src.sandwich.modeling import modeling_metadata
9
+
10
+ from . import ddl_mssql, ddl_postgres
11
+
12
+ def get_columns_list(table: Table, sep: str = ", ", alias: str = None):
13
+ alias = alias + "." if alias else ""
14
+ return sep.join([f"{alias or ''}[{fld.name}]" for fld in table.columns.values()])
15
+
16
+ def get_string_to_hash_ddl_mssql(columns_count: int) -> str:
17
+ if columns_count < 2 or columns_count > 100:
18
+ raise ValueError("columns_count must be between 2 and 100")
19
+
20
+ params_list_str = ",\n\t".join([f"@StrValue{v} nvarchar(1000)" for v in range(1, columns_count + 1)])
21
+ concat_list_str = ", ';',\n\t\t\t".join(
22
+ [f"rtrim(ltrim(isnull(@StrValue{v}, '')))" for v in range(1, columns_count + 1)])
23
+
24
+ # language=sql
25
+ func = f"""
26
+ create or alter function [core].[StringToHash{columns_count}]
27
+ (
28
+ {params_list_str}
29
+ ) returns char(40) as
30
+ begin
31
+ declare @result char(40);
32
+ set @result = upper(convert(char(40), hashbytes('sha1',
33
+ upper(concat(
34
+ {concat_list_str}
35
+ ))
36
+ ), 2));
37
+ return @result;
38
+ end"""
39
+ return func
40
+
41
+ def get_string_to_hash_ddl_postgres(columns_count: int) -> str:
42
+ if columns_count < 2 or columns_count > 100:
43
+ raise ValueError("columns_count must be between 2 and 100")
44
+
45
+ params_list_str = ",\n\t".join([f"p_str_value{v} text" for v in range(1, columns_count + 1)])
46
+ concat_list_str = ", ';',\n\t\t\t".join(
47
+ [f"upper(trim(coalesce(p_str_value{v}, '')))" for v in range(1, columns_count + 1)])
48
+
49
+ # language=sql
50
+ func = f"""
51
+ create or replace function core.string_to_hash{columns_count}(
52
+ {params_list_str}
53
+ ) returns char(40)
54
+ language plpgsql
55
+ as $$
56
+ declare
57
+ result char(40);
58
+ begin
59
+ result :=
60
+ upper(
61
+ encode(
62
+ digest(
63
+ concat(
64
+ {concat_list_str}
65
+ ),
66
+ 'sha1'
67
+ ),
68
+ 'hex'
69
+ )
70
+ );
71
+ return cast(result as char(40));
72
+ end;
73
+ $$;"""
74
+ return func
75
+
76
+ def initialize_database(conn: Engine | Connection, dialect: str = "mssql",
77
+ str_to_hash_count:int = 66,
78
+ verbose: bool = False,
79
+ drop_entities_table: bool = False) -> None:
80
+ init_scripts: dict[str, str] = {}
81
+ header = modeling_metadata.HEADER_TEMPLATE.format(
82
+ created_on=datetime.now(),
83
+ updated_on=datetime.now(),
84
+ version=SANDWICH_VERSION,
85
+ entity_name="SYSTEM")
86
+
87
+ if dialect == "mssql":
88
+ init_scripts["create_schemas"] = ddl_mssql.create_schemas
89
+ if drop_entities_table:
90
+ init_scripts["drop_entities_table"] = "drop table if exists [core].[entities];"
91
+ init_scripts["create_entities_table"] = ddl_mssql.create_entities_table
92
+ init_scripts["create_func_StringToHash1"] = header + ddl_mssql.create_func_StringToHash
93
+ for i in range(2, str_to_hash_count):
94
+ init_scripts[f"create_func_StringToHash{i}"] = header + get_string_to_hash_ddl_mssql(i)
95
+ init_scripts["create_table_ExecutionLog"] = ddl_mssql.create_table_ExecutionLog
96
+ init_scripts["create_table_ErrorLog"] = ddl_mssql.create_table_ErrorLog
97
+ init_scripts["create_proc_LogExecution"] = header + ddl_mssql.create_proc_LogExecution
98
+ elif dialect == "postgres":
99
+ init_scripts["create_extensions"] = ddl_postgres.create_extensions
100
+ init_scripts["create_schemas"] = ddl_postgres.create_schemas
101
+ if drop_entities_table:
102
+ init_scripts["drop_entities_table"] = "drop table if exists core.entities"
103
+ init_scripts["create_entities_table"] = ddl_postgres.create_entities_table
104
+ init_scripts["create_func_StringToHash1"] = ddl_postgres.create_func_StringToHash
105
+ for i in range(2, str_to_hash_count):
106
+ init_scripts[f"create_func_StringToHash{i}"] = get_string_to_hash_ddl_postgres(i)
107
+ init_scripts["create_table_ExecutionLog"] = ddl_postgres.create_table_ExecutionLog
108
+ init_scripts["create_table_ErrorLog"] = ddl_postgres.create_table_ErrorLog
109
+ init_scripts["create_proc_LogExecution"] = ddl_postgres.create_proc_LogExecution
110
+ else:
111
+ raise err.Dv2NotYetImplementedForDialectError(dialect)
112
+
113
+ for name, script in init_scripts.items():
114
+ if verbose:
115
+ print(f"[ok] Executing script: {name}")
116
+ conn.execute(text(script))
117
+
118
+ def get_proc_definition_dml_mssql(proc_param_name: str) -> TextClause:
119
+ return text(f"""
120
+ SELECT sm.definition
121
+ FROM sys.sql_modules sm
122
+ JOIN sys.objects o ON sm.object_id = o.object_id
123
+ JOIN sys.schemas s ON o.schema_id = s.schema_id
124
+ WHERE o.type = 'P'
125
+ AND '['+s.name+'].['+o.name+']' = :{proc_param_name}
126
+ """)
127
+
128
+ def parse_auto_generated_header(full_proc_text: str) -> dict[str, Any]:
129
+ started = False
130
+ rows_in_header = 0
131
+ result: dict[str, Any] = {}
132
+ for ln in full_proc_text.splitlines():
133
+ if started:
134
+ rows_in_header += 1
135
+ if ln.lstrip().startswith("Created on"):
136
+ result["created_on"] = ln.split(":", 1)[1].strip()
137
+ elif ln.lstrip().startswith("Updated on"):
138
+ result["updated_on"] = ln.split(":", 1)[1].strip()
139
+ elif ln.strip() == "*/":
140
+ break
141
+ else:
142
+ continue
143
+ if ln.strip() == "/*":
144
+ started = True
145
+ continue
146
+ result["rows_in_header"] = rows_in_header - 1 if rows_in_header > 0 else 0
147
+ return result
@@ -0,0 +1,82 @@
1
+ from datetime import datetime
2
+
3
+ from sqlalchemy import Connection, Engine, MetaData, select, Table, text, RowMapping, Sequence
4
+
5
+ from sandwich.dialects import DialectHandlerFactory
6
+ from sandwich.modeling import get_stg_info, infer_template, Dv2SystemInfo, Dv2Entity, StgInfo
7
+ from sandwich.modeling.strategies import SchemaGenerator, StrategyFactory
8
+
9
+
10
+ def _register_entity(entity_name: str, template: str, conn: Engine | Connection,
11
+ verbose: bool = False) -> datetime:
12
+ entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
13
+ created_result = conn.execute(select(entities.c.created).where(entity_name == entities.c.entity_name)).scalar_one_or_none()
14
+
15
+ if created_result is None:
16
+ created_result = datetime.now()
17
+ conn.execute(entities.insert().values(entity_name=entity_name, template=template, created=created_result))
18
+ if verbose:
19
+ print(f"[ok] Registered `{entity_name}` for `{template}`")
20
+ else:
21
+ _update_entity(entity_name, conn, entities, verbose=verbose)
22
+
23
+ return created_result
24
+
25
+ def _update_entity(entity_name: str, conn: Engine | Connection, sys_entities: Table, verbose: bool = False) -> None:
26
+ conn.execute(
27
+ sys_entities.update().where(entity_name == sys_entities.c.entity_name).values(updated=datetime.now(), is_deleted=False))
28
+ if verbose:
29
+ print(f"[ok] Updated `{entity_name}`")
30
+
31
+
32
+ def generate_schema(schema_generator: SchemaGenerator, registered_on: datetime, conn: Engine | Connection,
33
+ verbose: bool = False) -> None:
34
+ tables = schema_generator.make_tables()
35
+ for table_type, table in tables.items():
36
+ if table is not None:
37
+ table.create(conn, checkfirst=True)
38
+ if verbose:
39
+ print(f"[ok] Created table [{table.schema}].[{table.name}]")
40
+
41
+ procedures = schema_generator.make_procedures(tables, registered_on)
42
+ for proc_type, (proc_code, proc_name, _) in procedures.items():
43
+ conn.execute(text(proc_code))
44
+ if verbose:
45
+ print(f"[ok] Created or altered {proc_name}")
46
+
47
+
48
+ def _generate_schema_for_entity(stg_info: StgInfo, conn: Engine | Connection, dialect: str,
49
+ registered_on: datetime, template: str | None, verbose: bool = False) -> None:
50
+ validator = StrategyFactory.create_validator(template)
51
+ sys_info = get_system_info(conn)
52
+ validation_result = validator.validate_staging(stg_info, sys_info)
53
+ dialect_handler = DialectHandlerFactory.create_handler(dialect)
54
+ schema_generator = StrategyFactory.create_generator(dialect_handler, validation_result)
55
+ generate_schema(schema_generator, registered_on, conn, verbose=verbose)
56
+
57
+
58
+ def register_and_create_entity(entity_name: str, conn: Engine | Connection, dialect: str, template: str | None = None,
59
+ schema: str = "stg", verbose: bool = False) -> None:
60
+ stg_info = get_stg_info(entity_name, schema, conn)
61
+ if template is None:
62
+ template = infer_template(stg_info)
63
+ registered_on = _register_entity(entity_name, template, conn)
64
+ _generate_schema_for_entity(stg_info, conn, dialect, registered_on, template, verbose=verbose)
65
+
66
+
67
+ def update_registered_entities(conn: Engine | Connection, dialect: str, schema: str = "stg",
68
+ verbose: bool = False) -> None:
69
+ sys_info = get_system_info(conn)
70
+ for en in sys_info.entities_list:
71
+ stg_info = get_stg_info(en.entity_name, schema, conn)
72
+ _update_entity(en.entity_name, conn, sys_info.sys_entities, verbose=verbose)
73
+ _generate_schema_for_entity(stg_info, conn, dialect, en.created_on, en.template, verbose=verbose)
74
+
75
+
76
+ def get_system_info(conn: Engine | Connection):
77
+ sys_entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
78
+ select_result = conn.execute(sys_entities.select().where(~sys_entities.c.is_deleted))
79
+ return Dv2SystemInfo(
80
+ [Dv2Entity(en["entity_name"], en["template"], en["created"]) for en in select_result.mappings().all()],
81
+ sys_entities
82
+ )
sandwich/errors.py ADDED
@@ -0,0 +1,25 @@
1
+ from typing import Any
2
+
3
+ class _Dv2TemplatedError(Exception):
4
+ """Override `_template` with a string using `{value}` placeholder and optionally `{field}` placeholder.
5
+ Example: `_template = "User with {field}={value} not found"`
6
+ """
7
+
8
+ _template: str
9
+
10
+ def __init__(self, value: Any, field: str | None = None):
11
+ template = getattr(self, "_template", None)
12
+ if not template:
13
+ raise NotImplementedError("_template is not implemented")
14
+
15
+ if field:
16
+ message = template.format(field=field, value=value)
17
+ else:
18
+ message = template.format(value=value)
19
+
20
+ super().__init__(message)
21
+
22
+ class Dv2NotYetImplementedForDialectError(_Dv2TemplatedError):
23
+ _template = "Not yet implemented for '{value}' dialect"
24
+ def __init__(self, value: Any):
25
+ super().__init__(value)
sandwich/main.py ADDED
File without changes
@@ -0,0 +1,120 @@
1
+ from dataclasses import dataclass
2
+ from datetime import datetime
3
+ from typing import Any, Tuple
4
+
5
+ from sqlalchemy import Table, Engine, Connection, MetaData
6
+
7
+ @dataclass(frozen=True)
8
+ class Dv2Entity:
9
+ entity_name: str
10
+ template: str
11
+ created_on: datetime
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class StgInfo:
16
+ stg_name: str
17
+ stg_schema: str
18
+ hk_keys: dict[str, Any]
19
+ bk_keys: dict[str, Any]
20
+ sys_columns: dict[str, Any]
21
+ bus_columns: dict[str, Any]
22
+ degenerate_field: Tuple[str, Any] | None = None
23
+
24
+ @dataclass(frozen=True)
25
+ class Dv2SystemInfo:
26
+ entities_list: list[Dv2Entity]
27
+ sys_entities: Table
28
+
29
+ class Dv2ModelingMetadata:
30
+ HEADER_TEMPLATE = """/*
31
+ =====================================================================
32
+ AUTO-GENERATED CODE — DO NOT EDIT MANUALLY
33
+ =====================================================================
34
+
35
+ This stored procedure was automatically generated by:
36
+ sandwich (https://pypi.org/project/sandwich/)
37
+
38
+ Manual changes are discouraged.
39
+ This file may be regenerated at any time, and all manual edits
40
+ will be overwritten.
41
+
42
+ Created on : {created_on:%Y-%m-%d %H:%M:%S}
43
+ Updated on : {updated_on:%Y-%m-%d %H:%M:%S}
44
+ Generator : sandwich v{version}
45
+ Entity name : {entity_name}
46
+
47
+ =====================================================================
48
+ */
49
+ """
50
+ loaddate = "LoadDate"
51
+ recordsource = "RecordSource"
52
+ hashdiff = "HashDiff"
53
+ is_available = "IsAvailable"
54
+ names: list[str] = [loaddate, recordsource, hashdiff, is_available]
55
+ # _dialects_config: dict[str, dict[str, str]] = {
56
+ # "mssql": {
57
+ # loaddate: "DATETIME2",
58
+ # recordsource: "VARCHAR(200)",
59
+ # hashdiff: "CHAR(40)",
60
+ # is_available: "BIT",
61
+ # },
62
+ # "postgres": {
63
+ # loaddate: "TIMESTAMP",
64
+ # recordsource: "TEXT",
65
+ # hashdiff: "CHAR(40)",
66
+ # is_available: "BOOLEAN",
67
+ # },
68
+ # }
69
+ def __init__(self):
70
+ #self.dialect = dialect
71
+
72
+ #self.column_types = self._dialects_config[self.dialect]
73
+ self.required_columns: list[str] = [self.loaddate, self.recordsource]
74
+
75
+ modeling_metadata = Dv2ModelingMetadata()
76
+
77
+ def get_stg_info(entity_name: str, schema: str, conn: Engine | Connection) -> StgInfo:
78
+ stg = Table(entity_name, MetaData(), schema=schema, autoload_with=conn)
79
+
80
+ hk_keys: dict[str, Any] = {}
81
+ bk_keys: dict[str, Any] = {}
82
+ sys_columns: dict[str, Any] = {}
83
+ bus_columns: dict[str, Any] = {}
84
+ degenerate_field: Tuple[str, Any] | None = None
85
+
86
+ for col in stg.columns.values():
87
+ if col.name.startswith("hk_"): # hash key
88
+ hk_keys[col.name] = col.type
89
+ elif col.name.startswith("bk_"): # business key
90
+ bk_keys[col.name] = col.type
91
+ elif col.name.startswith("dg_"): # degenerate field (transactional links only)
92
+ degenerate_field = (col.name, col.type)
93
+ elif col.name.startswith("sg_"): # surrogate key
94
+ raise Exception(f"sg column '{col.name}' is not implemented yet")
95
+ elif col.name in modeling_metadata.names:
96
+ # type_name = metadata.column_types[col.name]
97
+ # if not str(col.type).startswith(type_name):
98
+ # raise Exception(f"{col.name} column must be `{type_name}` type, but it is `{str(col.type)}`")
99
+ sys_columns[col.name] = col.type
100
+ else:
101
+ bus_columns[col.name] = col.type
102
+
103
+ return StgInfo(
104
+ stg_name=stg.name,
105
+ stg_schema=stg.schema,
106
+ hk_keys=hk_keys,
107
+ bk_keys=bk_keys,
108
+ sys_columns=sys_columns,
109
+ bus_columns=bus_columns,
110
+ degenerate_field=degenerate_field,
111
+ )
112
+
113
+ def infer_template(stg_info: StgInfo):
114
+ hk_count = len(stg_info.hk_keys)
115
+ if hk_count == 0:
116
+ raise Exception("hk column is required for `scd2dim` validation")
117
+ elif hk_count > 1:
118
+ return "link2fact"
119
+ else:
120
+ return "scd2dim"
@@ -0,0 +1,15 @@
1
+ from .base import SchemaGenerator, Validator
2
+ from .factory import StrategyFactory
3
+ from .link2fact import Link2FactValidator, Link2FactSchemaGenerator
4
+ from .scd2dim import Scd2DimValidator, Scd2DimSchemaGenerator
5
+
6
+ __all__ = [
7
+ "Validator",
8
+ "SchemaGenerator",
9
+ "StrategyFactory",
10
+ "Scd2DimValidator",
11
+ "Scd2DimSchemaGenerator",
12
+ "Link2FactValidator",
13
+ "Link2FactSchemaGenerator",
14
+ ]
15
+
@@ -0,0 +1,94 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from datetime import datetime
4
+ from typing import Any, Callable, Tuple
5
+
6
+ from sqlalchemy import Table
7
+
8
+ from sandwich.modeling import Dv2SystemInfo, modeling_metadata, StgInfo
9
+
10
+ @dataclass(frozen=True)
11
+ class ValidationResult:
12
+ stg_schema: str
13
+ entity_name: str
14
+ bk_keys: list[Tuple[str, Any]]
15
+ hk_keys: list[Tuple[str, Any]]
16
+ business_column_types: dict[str, Any]
17
+ system_column_types: dict[str, Any]
18
+ template: str
19
+ degenerate_field: Tuple[str, Any] | None = None
20
+
21
+ class Validator(ABC):
22
+ @abstractmethod
23
+ def validate_staging(self, stg_info: StgInfo, sys_info: Dv2SystemInfo, verbose: bool = False) -> ValidationResult:
24
+ pass
25
+
26
+ class BaseValidator(Validator):
27
+ def __init__(self, template: str):
28
+ self._on_validate_staging: Callable[[StgInfo, Dv2SystemInfo], None] | None = None
29
+ self.template = template
30
+
31
+ def validate_staging(self, stg_info: StgInfo, sys_info: Dv2SystemInfo, verbose: bool = False) -> ValidationResult:
32
+ """Validate staging table or view for `scd2dim` template.
33
+
34
+ Raises: Exception"""
35
+ if verbose:
36
+ raise Exception("verbose is not implemented yet")
37
+
38
+ system_column_names = stg_info.sys_columns.keys()
39
+
40
+ # universal check - all dv2 raw objects should be auditable
41
+ for required_col in modeling_metadata.required_columns:
42
+ if required_col not in system_column_names:
43
+ raise Exception(f"{required_col} column is required")
44
+
45
+ if self._on_validate_staging is not None:
46
+ self._on_validate_staging(stg_info, sys_info)
47
+
48
+ # todo: ValidationResult is not required whatsoever
49
+ return ValidationResult(
50
+ stg_schema=stg_info.stg_schema,
51
+ entity_name=stg_info.stg_name,
52
+ bk_keys=[(nm, tp) for nm, tp in stg_info.bk_keys.items()],
53
+ hk_keys=[(nm, tp) for nm, tp in stg_info.hk_keys.items()],
54
+ degenerate_field = stg_info.degenerate_field,
55
+ business_column_types=stg_info.bus_columns,
56
+ system_column_types=stg_info.sys_columns,
57
+ template=self.template
58
+ )
59
+
60
+
61
+
62
+ class SchemaGenerator(ABC):
63
+ @abstractmethod
64
+ def make_tables(self) -> dict[str, Table]:
65
+ pass
66
+
67
+ @abstractmethod
68
+ def make_procedures(
69
+ self,
70
+ tables: dict[str, Table],
71
+ entity_registration_date: datetime = datetime.now()
72
+ ) -> dict[str, Tuple[str, str, str]]:
73
+ pass
74
+
75
+ # class BaseSchemaGenerator(SchemaGenerator):
76
+ # def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
77
+ # self.dialect_handler = dialect_handler
78
+ # self._validation_result = validation_result
79
+ # self._on_make_proc: Callable[[Table, datetime], Tuple[str, str, str]] | None = None
80
+ #
81
+ # def make_proc(self, tbl: Table, entity_registration_date: datetime) -> Tuple[str, str, str]:
82
+ # header = modeling_metadata.HEADER_TEMPLATE.format(
83
+ # created_on=entity_registration_date,
84
+ # updated_on=datetime.now(),
85
+ # version=SANDWICH_VERSION,
86
+ # entity_name=self._validation_result.entity_name
87
+ # )
88
+ #
89
+ # if self._validation_result.stg_schema == "proxy":
90
+ # stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
91
+ # entity_name=self._validation_result.entity_name,
92
+ # header=header
93
+ # )
94
+ # procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
@@ -0,0 +1,39 @@
1
+ from src.sandwich.dialects import DialectHandler
2
+
3
+ from .base import Validator, SchemaGenerator, ValidationResult
4
+ from .link2fact import Link2FactValidator, Link2FactSchemaGenerator
5
+ from .scd2dim import Scd2DimValidator, Scd2DimSchemaGenerator
6
+
7
+
8
+ class StrategyFactory:
9
+ _strategies = {
10
+ "scd2dim": (Scd2DimValidator, Scd2DimSchemaGenerator),
11
+ "link2fact": (Link2FactValidator, Link2FactSchemaGenerator),
12
+ }
13
+
14
+ @classmethod
15
+ def register_strategy(cls, template_name: str, validator_class, generator_class):
16
+ cls._strategies[template_name] = (validator_class, generator_class)
17
+
18
+ @classmethod
19
+ def create_validator(cls, template: str) -> Validator:
20
+ if template not in cls._strategies:
21
+ available = ", ".join(cls._strategies.keys())
22
+ raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
23
+
24
+ validator_class, _ = cls._strategies[template]
25
+ return validator_class(template)
26
+
27
+ @classmethod
28
+ def create_generator(cls, dialect_handler: DialectHandler, validation_result: ValidationResult) -> SchemaGenerator:
29
+ template = validation_result.template
30
+ if template not in cls._strategies:
31
+ available = ", ".join(cls._strategies.keys())
32
+ raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
33
+
34
+ _, generator_class = cls._strategies[template]
35
+ return generator_class(dialect_handler, validation_result)
36
+
37
+ @classmethod
38
+ def get_available_templates(cls) -> list[str]:
39
+ return list(cls._strategies.keys())