sandwich 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ from dataclasses import dataclass
2
+ from typing import Any
3
+
4
+ from sqlalchemy import Table
5
+
6
+ @dataclass(frozen=True)
7
+ class StgInfo:
8
+ stg_name: str
9
+ stg_schema: str
10
+ hk_keys: dict[str, Any]
11
+ bk_keys: dict[str, Any]
12
+ sys_columns: dict[str, Any]
13
+ bus_columns: dict[str, Any]
14
+
15
+ class Dv2ModelingMetadata:
16
+ HEADER_TEMPLATE = """/*
17
+ =====================================================================
18
+ AUTO-GENERATED CODE — DO NOT EDIT MANUALLY
19
+ =====================================================================
20
+
21
+ This stored procedure was automatically generated by:
22
+ sandwich (https://pypi.org/project/sandwich/)
23
+
24
+ Manual changes are discouraged.
25
+ This file may be regenerated at any time, and all manual edits
26
+ will be overwritten.
27
+
28
+ Created on : {created_on:%Y-%m-%d %H:%M:%S}
29
+ Updated on : {updated_on:%Y-%m-%d %H:%M:%S}
30
+ Generator : sandwich v{version}
31
+ Entity name : {entity_name}
32
+
33
+ =====================================================================
34
+ */
35
+ """
36
+ loaddate = "LoadDate"
37
+ recordsource = "RecordSource"
38
+ hashdiff = "HashDiff"
39
+ is_available = "IsAvailable"
40
+ names: list[str] = [loaddate, recordsource, hashdiff, is_available]
41
+ # _dialects_config: dict[str, dict[str, str]] = {
42
+ # "mssql": {
43
+ # loaddate: "DATETIME2",
44
+ # recordsource: "VARCHAR(200)",
45
+ # hashdiff: "CHAR(40)",
46
+ # is_available: "BIT",
47
+ # },
48
+ # "postgres": {
49
+ # loaddate: "TIMESTAMP",
50
+ # recordsource: "TEXT",
51
+ # hashdiff: "CHAR(40)",
52
+ # is_available: "BOOLEAN",
53
+ # },
54
+ # }
55
+ def __init__(self):
56
+ #self.dialect = dialect
57
+
58
+ #self.column_types = self._dialects_config[self.dialect]
59
+ self.required_columns: list[str] = [self.loaddate, self.recordsource]
60
+
61
+
62
+ modeling_metadata = Dv2ModelingMetadata()
63
+
64
+ def get_stg_info(stg: Table) -> StgInfo:
65
+ hk_keys: dict[str, Any] = {}
66
+ bk_keys: dict[str, Any] = {}
67
+ sys_columns: dict[str, Any] = {}
68
+ bus_columns: dict[str, Any] = {}
69
+
70
+ for col in stg.columns.values():
71
+ if col.name.startswith("hk_"):
72
+ hk_keys[col.name] = col.type
73
+ elif col.name.startswith("bk_"):
74
+ bk_keys[col.name] = col.type
75
+ elif col.name.startswith("sg_"):
76
+ raise Exception(f"sg column '{col.name}' is not implemented yet")
77
+ elif col.name.startswith("ts_"):
78
+ raise Exception(f"ts column '{col.name}' is not implemented yet")
79
+ elif col.name in modeling_metadata.names:
80
+ # type_name = metadata.column_types[col.name]
81
+ # if not str(col.type).startswith(type_name):
82
+ # raise Exception(f"{col.name} column must be `{type_name}` type, but it is `{str(col.type)}`")
83
+ sys_columns[col.name] = col.type
84
+ else:
85
+ bus_columns[col.name] = col.type
86
+
87
+ return StgInfo(
88
+ stg_name=stg.name,
89
+ stg_schema=stg.schema,
90
+ hk_keys=hk_keys,
91
+ bk_keys=bk_keys,
92
+ sys_columns=sys_columns,
93
+ bus_columns=bus_columns,
94
+ )
95
+
96
+ def infer_template(stg_info: StgInfo):
97
+ hk_count = len(stg_info.hk_keys)
98
+ if hk_count == 0:
99
+ raise Exception("hk column is required for `scd2dim` validation")
100
+ elif hk_count > 1:
101
+ return "link2fact"
102
+ else:
103
+ return "scd2dim"
@@ -0,0 +1,15 @@
1
+ from .base import SchemaGenerator, Validator
2
+ from .factory import StrategyFactory
3
+ from .link2fact import Link2FactValidator, Link2FactSchemaGenerator
4
+ from .scd2dim import Scd2DimValidator, Scd2DimSchemaGenerator
5
+
6
+ __all__ = [
7
+ "Validator",
8
+ "SchemaGenerator",
9
+ "StrategyFactory",
10
+ "Scd2DimValidator",
11
+ "Scd2DimSchemaGenerator",
12
+ "Link2FactValidator",
13
+ "Link2FactSchemaGenerator",
14
+ ]
15
+
@@ -0,0 +1,44 @@
1
+ from abc import ABC, abstractmethod
2
+ from datetime import datetime
3
+ from typing import Any, Tuple
4
+
5
+ from sqlalchemy import Table
6
+
7
+ from src.sandwich.modeling import StgInfo
8
+
9
+ class ValidationResult:
10
+ def __init__(self, stg_schema: str, entity_name: str
11
+ , bk_keys: list[Tuple[str, Any]]
12
+ , hk_key: Tuple[str, Any]
13
+ , business_column_types: dict[str, Any]
14
+ , system_column_types: dict[str, Any]):
15
+ self.stg_schema = stg_schema
16
+ self.entity_name = entity_name
17
+ self.bk_keys = bk_keys
18
+ self.hk_key = hk_key
19
+ self.business_column_types = business_column_types
20
+ self.system_column_types = system_column_types
21
+
22
+ class Validator(ABC):
23
+ @abstractmethod
24
+ def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
25
+ pass
26
+
27
+
28
+ class SchemaGenerator(ABC):
29
+ @property
30
+ @abstractmethod
31
+ def entity_info(self) -> ValidationResult: ...
32
+
33
+ @abstractmethod
34
+ def make_tables(self) -> dict[str, Table]:
35
+ pass
36
+
37
+ @abstractmethod
38
+ def make_procedures(
39
+ self,
40
+ tables: dict[str, Table],
41
+ entity_registration_date: datetime = datetime.now()
42
+ ) -> dict[str, Tuple[str, str, str]]:
43
+ pass
44
+
@@ -0,0 +1,38 @@
1
+ from src.sandwich.dialects import DialectHandler
2
+
3
+ from .base import Validator, SchemaGenerator, ValidationResult
4
+ from .link2fact import Link2FactValidator, Link2FactSchemaGenerator
5
+ from .scd2dim import Scd2DimValidator, Scd2DimSchemaGenerator
6
+
7
+
8
+ class StrategyFactory:
9
+ _strategies = {
10
+ "scd2dim": (Scd2DimValidator, Scd2DimSchemaGenerator),
11
+ "link2fact": (Link2FactValidator, Link2FactSchemaGenerator),
12
+ }
13
+
14
+ @classmethod
15
+ def register_strategy(cls, template_name: str, validator_class, generator_class):
16
+ cls._strategies[template_name] = (validator_class, generator_class)
17
+
18
+ @classmethod
19
+ def create_validator(cls, template: str) -> Validator:
20
+ if template not in cls._strategies:
21
+ available = ", ".join(cls._strategies.keys())
22
+ raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
23
+
24
+ validator_class, _ = cls._strategies[template]
25
+ return validator_class()
26
+
27
+ @classmethod
28
+ def create_generator(cls, template: str, dialect_handler: DialectHandler, validation_result: ValidationResult) -> SchemaGenerator:
29
+ if template not in cls._strategies:
30
+ available = ", ".join(cls._strategies.keys())
31
+ raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
32
+
33
+ _, generator_class = cls._strategies[template]
34
+ return generator_class(dialect_handler, validation_result)
35
+
36
+ @classmethod
37
+ def get_available_templates(cls) -> list[str]:
38
+ return list(cls._strategies.keys())
@@ -0,0 +1,91 @@
1
+ """Link to Fact strategy implementations."""
2
+ from datetime import datetime
3
+ from typing import Tuple
4
+
5
+ from sqlalchemy import Table
6
+
7
+ from src.sandwich.dialects.base import DialectHandler
8
+
9
+ from .base import Validator, SchemaGenerator, ValidationResult
10
+
11
+
12
+ class Link2FactValidator(Validator):
13
+
14
+ def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> dict:
15
+ """Validate staging table for link2fact mode."""
16
+ if verbose:
17
+ raise Exception("verbose is not implemented yet")
18
+
19
+ # TODO: Implement link2fact specific validation logic
20
+ # This will likely be different from scd2dim validation
21
+ # For example: checking for link keys, fact columns, etc.
22
+
23
+ bk_keys = []
24
+ hk_key = None
25
+ business_column_types = {}
26
+ system_column_types = {}
27
+ link_keys = [] # New concept for link2fact
28
+ fact_columns = [] # New concept for link2fact
29
+
30
+ # Placeholder validation logic
31
+ for col in stg_table.columns.values():
32
+ # TODO: Implement column classification for link2fact mode
33
+ pass
34
+
35
+ return {
36
+ "stg_schema": stg_table.schema,
37
+ "entity_name": stg_table.name,
38
+ "bk_keys": bk_keys,
39
+ "hk_key": hk_key,
40
+ "business_column_types": business_column_types,
41
+ "system_column_types": system_column_types,
42
+ "link_keys": link_keys,
43
+ "fact_columns": fact_columns,
44
+ }
45
+
46
+
47
+ class Link2FactSchemaGenerator(SchemaGenerator):
48
+
49
+ def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
50
+ self.dialect_handler = dialect_handler
51
+ self._validation_result = validation_result
52
+
53
+ @property
54
+ def entity_info(self) -> ValidationResult:
55
+ return self._validation_result
56
+
57
+ def make_tables(self) -> dict[str, Table]:
58
+ """Create link and fact tables for link2fact mode."""
59
+ # TODO: Implement link2fact table creation
60
+ # This will create different table structures than scd2dim
61
+ # For example: link table, fact table (instead of hub/sat/dim)
62
+
63
+ entity_name = self._validation_result.entity_name
64
+
65
+ # Placeholder - actual implementation needed
66
+ link_table: Table | None = None
67
+ fact_table: Table | None = None
68
+
69
+ return {
70
+ "link": link_table,
71
+ "fact": fact_table,
72
+ }
73
+
74
+ def make_procedures(
75
+ self,
76
+ tables: dict[str, Table],
77
+ entity_registration_date: datetime,
78
+ ) -> dict[str, Tuple[str, str]]:
79
+ """Generate procedures for link2fact mode."""
80
+ procedures = {}
81
+
82
+ # TODO: Implement link2fact procedure generation using dialect_handler
83
+ # This will generate different procedures than scd2dim
84
+ # For example: link population, fact population, aggregation logic, etc.
85
+
86
+ # When implementing, use self.dialect_handler methods to generate SQL
87
+ # Example:
88
+ # link_proc_code, link_proc_name = self.dialect_handler.make_link_proc(...)
89
+ # procedures["link"] = (link_proc_code, link_proc_name)
90
+
91
+ return procedures
@@ -0,0 +1,246 @@
1
+ from datetime import datetime
2
+ from typing import Iterator, Tuple
3
+
4
+ from sqlalchemy import Column, MetaData, Table, UniqueConstraint
5
+
6
+ from src.sandwich import SANDWICH_VERSION
7
+ from src.sandwich.dialects.base import DialectHandler
8
+ from src.sandwich.modeling import modeling_metadata, StgInfo
9
+
10
+ from .base import Validator, SchemaGenerator, ValidationResult
11
+
12
+
13
+ class Scd2DimValidator(Validator):
14
+
15
+ def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
16
+ """Validate staging table or view for `scd2dim` template.
17
+
18
+ Raises: Exception"""
19
+ if verbose:
20
+ raise Exception("verbose is not implemented yet")
21
+
22
+ # -----------------
23
+ # hk
24
+ # -----------------
25
+ # only one hash key is allowed for `scd2dim` profile
26
+ # and its name should match `hk_[entity_name]` pattern
27
+ hk_count = len(stg_info.hk_keys)
28
+ if hk_count == 0:
29
+ raise Exception("hk column is required for `scd2dim` validation")
30
+ elif hk_count > 1:
31
+ raise Exception(f"More than one hk column found in stg.{stg_info.stg_name}")
32
+ # hk_key = (key_name, key_type)
33
+ hk_key = list(stg_info.hk_keys.items())[0]
34
+ if hk_key[0] != f"hk_{stg_info.stg_name}":
35
+ raise Exception(f"hk column has invalid name '{hk_key[0]}'")
36
+
37
+ # -----------------
38
+ # BKs
39
+ # -----------------
40
+ # You don't need a hub or/and a dim tables for a non-business entity.
41
+ # So you have to have at least one business key, and you can have more.
42
+ # Naming convention is to just add a `bk_` prefix to the original key name
43
+ # because we want to keep information of the original names
44
+ if len(stg_info.bk_keys) == 0:
45
+ raise Exception("bk column(s) are required for `scd2dim` validation")
46
+
47
+
48
+ system_column_names = stg_info.sys_columns.keys()
49
+
50
+ # universal check - all dv2 raw objects should be auditable
51
+ for required_col in modeling_metadata.required_columns:
52
+ if required_col not in system_column_names:
53
+ raise Exception(f"{required_col} column is required")
54
+
55
+ # scd2dim specific validations
56
+ if modeling_metadata.hashdiff not in system_column_names:
57
+ raise Exception(f"{modeling_metadata.hashdiff} column is required for scd2dim validation")
58
+ if modeling_metadata.is_available not in system_column_names:
59
+ raise Exception(f"{modeling_metadata.is_available} column is required for scd2dim validation")
60
+
61
+ return ValidationResult(
62
+ stg_schema=stg_info.stg_schema,
63
+ entity_name=stg_info.stg_name,
64
+ bk_keys=[(nm, tp) for nm, tp in stg_info.bk_keys.items()],
65
+ hk_key=hk_key,
66
+ business_column_types=stg_info.bus_columns,
67
+ system_column_types=stg_info.sys_columns,
68
+ )
69
+
70
+ class Scd2DimSchemaGenerator(SchemaGenerator):
71
+
72
+ def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
73
+ self.dialect_handler = dialect_handler
74
+ self._validation_result = validation_result
75
+
76
+ @property
77
+ def entity_info(self) -> ValidationResult:
78
+ return self._validation_result
79
+
80
+ def make_tables(self) -> dict[str, Table]:
81
+ entity_name = self._validation_result.entity_name
82
+ bk_keys = self._validation_result.bk_keys
83
+ hk_key = self._validation_result.hk_key
84
+ business_column_types = self._validation_result.business_column_types
85
+ system_column_types = self._validation_result.system_column_types
86
+
87
+ # Helper functions for creating columns
88
+ def get_bk_columns() -> Iterator[Column]:
89
+ return (Column(bk_key[0], bk_key[1], nullable=False) for bk_key in bk_keys)
90
+
91
+ def get_bk_pk_columns() -> Iterator[Column]:
92
+ return (Column(bk_key[0], bk_key[1], primary_key=True) for bk_key in bk_keys)
93
+
94
+ def get_hk_pk_column() -> Column:
95
+ return Column(hk_key[0], hk_key[1], primary_key=True)
96
+
97
+ def get_loaddate_column() -> Column:
98
+ _load_date = modeling_metadata.loaddate
99
+ _load_date_type = system_column_types[_load_date]
100
+ return Column(_load_date, _load_date_type, nullable=False)
101
+
102
+ def get_loaddate_pk_column() -> Column:
103
+ _load_date = modeling_metadata.loaddate
104
+ _load_date_type = system_column_types[_load_date]
105
+ return Column(_load_date, _load_date_type, primary_key=True)
106
+
107
+ def get_datefrom_pk_column() -> Column:
108
+ _load_date = modeling_metadata.loaddate
109
+ _load_date_type = system_column_types[_load_date]
110
+ return Column("DateFrom", _load_date_type, primary_key=True)
111
+
112
+ def get_dateto_column() -> Column:
113
+ _load_date = modeling_metadata.loaddate
114
+ _load_date_type = system_column_types[_load_date]
115
+ return Column("DateTo", _load_date_type, nullable=True)
116
+
117
+ def get_recordsource_column() -> Column:
118
+ _record_source = modeling_metadata.recordsource
119
+ _record_source_type = system_column_types[_record_source]
120
+ return Column(_record_source, _record_source_type, nullable=False)
121
+
122
+ def get_business_columns() -> Iterator[Column]:
123
+ return (Column(col_name, col_type, nullable=True) for (col_name, col_type) in business_column_types.items())
124
+
125
+ def get_is_available_column() -> Column:
126
+ _is_available = modeling_metadata.is_available
127
+ _is_available_type = system_column_types[_is_available]
128
+ return Column(_is_available, _is_available_type, nullable=False)
129
+
130
+ def get_hashdiff_column() -> Column:
131
+ _hashdiff = modeling_metadata.hashdiff
132
+ _hashdiff_type = system_column_types[_hashdiff]
133
+ return Column(_hashdiff, _hashdiff_type, nullable=False)
134
+
135
+ # Create hub table
136
+ hub_table = Table(entity_name, MetaData(), schema="hub")
137
+ for bk_col in get_bk_columns():
138
+ hub_table.append_column(bk_col)
139
+ hub_table.append_column(get_hk_pk_column())
140
+ hub_table.append_column(get_loaddate_column())
141
+ hub_table.append_column(get_recordsource_column())
142
+ hub_table.append_constraint(UniqueConstraint(*[bk[0] for bk in bk_keys]))
143
+
144
+ # Create sat table
145
+ sat_table = Table(entity_name, MetaData(), schema="sat")
146
+ for bk_col in get_bk_columns():
147
+ sat_table.append_column(bk_col)
148
+ sat_table.append_column(get_hk_pk_column())
149
+ sat_table.append_column(get_loaddate_pk_column())
150
+ sat_table.append_column(get_recordsource_column())
151
+ sat_table.append_column(get_hashdiff_column())
152
+ for business_col in get_business_columns():
153
+ sat_table.append_column(business_col)
154
+ sat_table.append_column(get_is_available_column())
155
+
156
+ # Create dim table
157
+ dim_table = Table(entity_name, MetaData(), schema="dim")
158
+ for bk_col in get_bk_pk_columns():
159
+ dim_table.append_column(bk_col)
160
+ for business_col in get_business_columns():
161
+ dim_table.append_column(business_col)
162
+ dim_table.append_column(get_is_available_column())
163
+ dim_table.append_column(Column("IsCurrent", self.dialect_handler.get_boolean_type(), nullable=False))
164
+ dim_table.append_column(get_datefrom_pk_column())
165
+ dim_table.append_column(get_dateto_column())
166
+
167
+ return {
168
+ "hub": hub_table,
169
+ "sat": sat_table,
170
+ "dim": dim_table,
171
+ }
172
+
173
+ def make_procedures(self, tables: dict[str, Table]
174
+ , entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
175
+ procedures = {}
176
+
177
+ header = modeling_metadata.HEADER_TEMPLATE.format(
178
+ created_on=entity_registration_date,
179
+ updated_on=datetime.now(),
180
+ version=SANDWICH_VERSION,
181
+ entity_name=self._validation_result.entity_name
182
+ )
183
+
184
+ stg_proc_name = None
185
+ if self._validation_result.stg_schema == "proxy":
186
+ stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
187
+ entity_name=self._validation_result.entity_name,
188
+ header=header
189
+ )
190
+ procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
191
+
192
+ hub_table = tables["hub"]
193
+ hub_proc_code, hub_proc_name, hub_call_stmt = self.dialect_handler.make_hub_proc(
194
+ hub_table=hub_table,
195
+ bk_keys=self._validation_result.bk_keys,
196
+ header=header
197
+ )
198
+ procedures["hub"] = (hub_proc_code, hub_proc_name, hub_call_stmt)
199
+
200
+ # Generate sat procedure
201
+ sat_table = tables["sat"]
202
+ sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_sat_proc(
203
+ sat_table=sat_table,
204
+ hk_name=self._validation_result.hk_key[0],
205
+ hashdiff_col=modeling_metadata.hashdiff,
206
+ is_available_col=modeling_metadata.is_available,
207
+ loaddate_col=modeling_metadata.loaddate,
208
+ stg_schema=self._validation_result.stg_schema,
209
+ header=header
210
+ )
211
+ procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
212
+
213
+ # Generate dim procedure
214
+ dim_table = tables["dim"]
215
+ dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_dim_scd2_proc(
216
+ dim_table=dim_table,
217
+ bk_keys=self._validation_result.bk_keys,
218
+ header=header
219
+ )
220
+ procedures["dim"] = (dim_proc_code, dim_proc_name, dim_call_stmt)
221
+
222
+ # Generate job procedure
223
+ job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
224
+ entity_name=self._validation_result.entity_name,
225
+ hub_proc_name=hub_proc_name,
226
+ sat_proc_name=sat_proc_name,
227
+ dim_proc_name=dim_proc_name,
228
+ stg_proc_name=stg_proc_name,
229
+ header=header
230
+ )
231
+ procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
232
+
233
+ # Generate drop procedure
234
+ drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
235
+ entity_name=self._validation_result.entity_name,
236
+ stg_schema=self._validation_result.stg_schema,
237
+ job_proc_name=job_proc_name,
238
+ stg_proc_name=stg_proc_name,
239
+ hub_proc_name=hub_proc_name,
240
+ sat_proc_name=sat_proc_name,
241
+ dim_proc_name=dim_proc_name,
242
+ header=header
243
+ )
244
+ procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
245
+
246
+ return procedures