sandwich 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandwich/__init__.py +3 -0
- sandwich/dialects/__init__.py +12 -0
- sandwich/dialects/base.py +166 -0
- sandwich/dialects/ddl_mssql.py +148 -0
- sandwich/dialects/ddl_postgres.py +132 -0
- sandwich/dialects/factory.py +27 -0
- sandwich/dialects/mssql.py +271 -0
- sandwich/dialects/postgres.py +108 -0
- sandwich/dialects/utils.py +149 -0
- sandwich/dv2_helper.py +98 -0
- sandwich/errors.py +25 -0
- sandwich/main.py +0 -0
- sandwich/modeling/__init__.py +103 -0
- sandwich/strategies/__init__.py +15 -0
- sandwich/strategies/base.py +44 -0
- sandwich/strategies/factory.py +38 -0
- sandwich/strategies/link2fact.py +91 -0
- sandwich/strategies/scd2dim.py +246 -0
- {sandwich-0.2.1.dist-info → sandwich-0.2.2.dist-info}/METADATA +167 -154
- sandwich-0.2.2.dist-info/RECORD +23 -0
- sandwich-0.2.2.dist-info/WHEEL +4 -0
- sandwich-0.2.2.dist-info/entry_points.txt +3 -0
- sandwich-0.2.1.dist-info/RECORD +0 -5
- sandwich-0.2.1.dist-info/WHEEL +0 -4
- sandwich-0.2.1.dist-info/licenses/LICENSE +0 -9
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Table
|
|
5
|
+
|
|
6
|
+
@dataclass(frozen=True)
|
|
7
|
+
class StgInfo:
|
|
8
|
+
stg_name: str
|
|
9
|
+
stg_schema: str
|
|
10
|
+
hk_keys: dict[str, Any]
|
|
11
|
+
bk_keys: dict[str, Any]
|
|
12
|
+
sys_columns: dict[str, Any]
|
|
13
|
+
bus_columns: dict[str, Any]
|
|
14
|
+
|
|
15
|
+
class Dv2ModelingMetadata:
|
|
16
|
+
HEADER_TEMPLATE = """/*
|
|
17
|
+
=====================================================================
|
|
18
|
+
AUTO-GENERATED CODE — DO NOT EDIT MANUALLY
|
|
19
|
+
=====================================================================
|
|
20
|
+
|
|
21
|
+
This stored procedure was automatically generated by:
|
|
22
|
+
sandwich (https://pypi.org/project/sandwich/)
|
|
23
|
+
|
|
24
|
+
Manual changes are discouraged.
|
|
25
|
+
This file may be regenerated at any time, and all manual edits
|
|
26
|
+
will be overwritten.
|
|
27
|
+
|
|
28
|
+
Created on : {created_on:%Y-%m-%d %H:%M:%S}
|
|
29
|
+
Updated on : {updated_on:%Y-%m-%d %H:%M:%S}
|
|
30
|
+
Generator : sandwich v{version}
|
|
31
|
+
Entity name : {entity_name}
|
|
32
|
+
|
|
33
|
+
=====================================================================
|
|
34
|
+
*/
|
|
35
|
+
"""
|
|
36
|
+
loaddate = "LoadDate"
|
|
37
|
+
recordsource = "RecordSource"
|
|
38
|
+
hashdiff = "HashDiff"
|
|
39
|
+
is_available = "IsAvailable"
|
|
40
|
+
names: list[str] = [loaddate, recordsource, hashdiff, is_available]
|
|
41
|
+
# _dialects_config: dict[str, dict[str, str]] = {
|
|
42
|
+
# "mssql": {
|
|
43
|
+
# loaddate: "DATETIME2",
|
|
44
|
+
# recordsource: "VARCHAR(200)",
|
|
45
|
+
# hashdiff: "CHAR(40)",
|
|
46
|
+
# is_available: "BIT",
|
|
47
|
+
# },
|
|
48
|
+
# "postgres": {
|
|
49
|
+
# loaddate: "TIMESTAMP",
|
|
50
|
+
# recordsource: "TEXT",
|
|
51
|
+
# hashdiff: "CHAR(40)",
|
|
52
|
+
# is_available: "BOOLEAN",
|
|
53
|
+
# },
|
|
54
|
+
# }
|
|
55
|
+
def __init__(self):
|
|
56
|
+
#self.dialect = dialect
|
|
57
|
+
|
|
58
|
+
#self.column_types = self._dialects_config[self.dialect]
|
|
59
|
+
self.required_columns: list[str] = [self.loaddate, self.recordsource]
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
modeling_metadata = Dv2ModelingMetadata()
|
|
63
|
+
|
|
64
|
+
def get_stg_info(stg: Table) -> StgInfo:
|
|
65
|
+
hk_keys: dict[str, Any] = {}
|
|
66
|
+
bk_keys: dict[str, Any] = {}
|
|
67
|
+
sys_columns: dict[str, Any] = {}
|
|
68
|
+
bus_columns: dict[str, Any] = {}
|
|
69
|
+
|
|
70
|
+
for col in stg.columns.values():
|
|
71
|
+
if col.name.startswith("hk_"):
|
|
72
|
+
hk_keys[col.name] = col.type
|
|
73
|
+
elif col.name.startswith("bk_"):
|
|
74
|
+
bk_keys[col.name] = col.type
|
|
75
|
+
elif col.name.startswith("sg_"):
|
|
76
|
+
raise Exception(f"sg column '{col.name}' is not implemented yet")
|
|
77
|
+
elif col.name.startswith("ts_"):
|
|
78
|
+
raise Exception(f"ts column '{col.name}' is not implemented yet")
|
|
79
|
+
elif col.name in modeling_metadata.names:
|
|
80
|
+
# type_name = metadata.column_types[col.name]
|
|
81
|
+
# if not str(col.type).startswith(type_name):
|
|
82
|
+
# raise Exception(f"{col.name} column must be `{type_name}` type, but it is `{str(col.type)}`")
|
|
83
|
+
sys_columns[col.name] = col.type
|
|
84
|
+
else:
|
|
85
|
+
bus_columns[col.name] = col.type
|
|
86
|
+
|
|
87
|
+
return StgInfo(
|
|
88
|
+
stg_name=stg.name,
|
|
89
|
+
stg_schema=stg.schema,
|
|
90
|
+
hk_keys=hk_keys,
|
|
91
|
+
bk_keys=bk_keys,
|
|
92
|
+
sys_columns=sys_columns,
|
|
93
|
+
bus_columns=bus_columns,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
def infer_template(stg_info: StgInfo):
|
|
97
|
+
hk_count = len(stg_info.hk_keys)
|
|
98
|
+
if hk_count == 0:
|
|
99
|
+
raise Exception("hk column is required for `scd2dim` validation")
|
|
100
|
+
elif hk_count > 1:
|
|
101
|
+
return "link2fact"
|
|
102
|
+
else:
|
|
103
|
+
return "scd2dim"
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .base import SchemaGenerator, Validator
|
|
2
|
+
from .factory import StrategyFactory
|
|
3
|
+
from .link2fact import Link2FactValidator, Link2FactSchemaGenerator
|
|
4
|
+
from .scd2dim import Scd2DimValidator, Scd2DimSchemaGenerator
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"Validator",
|
|
8
|
+
"SchemaGenerator",
|
|
9
|
+
"StrategyFactory",
|
|
10
|
+
"Scd2DimValidator",
|
|
11
|
+
"Scd2DimSchemaGenerator",
|
|
12
|
+
"Link2FactValidator",
|
|
13
|
+
"Link2FactSchemaGenerator",
|
|
14
|
+
]
|
|
15
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Any, Tuple
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Table
|
|
6
|
+
|
|
7
|
+
from src.sandwich.modeling import StgInfo
|
|
8
|
+
|
|
9
|
+
class ValidationResult:
|
|
10
|
+
def __init__(self, stg_schema: str, entity_name: str
|
|
11
|
+
, bk_keys: list[Tuple[str, Any]]
|
|
12
|
+
, hk_key: Tuple[str, Any]
|
|
13
|
+
, business_column_types: dict[str, Any]
|
|
14
|
+
, system_column_types: dict[str, Any]):
|
|
15
|
+
self.stg_schema = stg_schema
|
|
16
|
+
self.entity_name = entity_name
|
|
17
|
+
self.bk_keys = bk_keys
|
|
18
|
+
self.hk_key = hk_key
|
|
19
|
+
self.business_column_types = business_column_types
|
|
20
|
+
self.system_column_types = system_column_types
|
|
21
|
+
|
|
22
|
+
class Validator(ABC):
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SchemaGenerator(ABC):
|
|
29
|
+
@property
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def entity_info(self) -> ValidationResult: ...
|
|
32
|
+
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def make_tables(self) -> dict[str, Table]:
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
@abstractmethod
|
|
38
|
+
def make_procedures(
|
|
39
|
+
self,
|
|
40
|
+
tables: dict[str, Table],
|
|
41
|
+
entity_registration_date: datetime = datetime.now()
|
|
42
|
+
) -> dict[str, Tuple[str, str, str]]:
|
|
43
|
+
pass
|
|
44
|
+
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from src.sandwich.dialects import DialectHandler
|
|
2
|
+
|
|
3
|
+
from .base import Validator, SchemaGenerator, ValidationResult
|
|
4
|
+
from .link2fact import Link2FactValidator, Link2FactSchemaGenerator
|
|
5
|
+
from .scd2dim import Scd2DimValidator, Scd2DimSchemaGenerator
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StrategyFactory:
|
|
9
|
+
_strategies = {
|
|
10
|
+
"scd2dim": (Scd2DimValidator, Scd2DimSchemaGenerator),
|
|
11
|
+
"link2fact": (Link2FactValidator, Link2FactSchemaGenerator),
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
@classmethod
|
|
15
|
+
def register_strategy(cls, template_name: str, validator_class, generator_class):
|
|
16
|
+
cls._strategies[template_name] = (validator_class, generator_class)
|
|
17
|
+
|
|
18
|
+
@classmethod
|
|
19
|
+
def create_validator(cls, template: str) -> Validator:
|
|
20
|
+
if template not in cls._strategies:
|
|
21
|
+
available = ", ".join(cls._strategies.keys())
|
|
22
|
+
raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
|
|
23
|
+
|
|
24
|
+
validator_class, _ = cls._strategies[template]
|
|
25
|
+
return validator_class()
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def create_generator(cls, template: str, dialect_handler: DialectHandler, validation_result: ValidationResult) -> SchemaGenerator:
|
|
29
|
+
if template not in cls._strategies:
|
|
30
|
+
available = ", ".join(cls._strategies.keys())
|
|
31
|
+
raise ValueError(f"Unknown template '{template}'. Available templates: {available}")
|
|
32
|
+
|
|
33
|
+
_, generator_class = cls._strategies[template]
|
|
34
|
+
return generator_class(dialect_handler, validation_result)
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def get_available_templates(cls) -> list[str]:
|
|
38
|
+
return list(cls._strategies.keys())
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Link to Fact strategy implementations."""
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Table
|
|
6
|
+
|
|
7
|
+
from src.sandwich.dialects.base import DialectHandler
|
|
8
|
+
|
|
9
|
+
from .base import Validator, SchemaGenerator, ValidationResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Link2FactValidator(Validator):
|
|
13
|
+
|
|
14
|
+
def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> dict:
|
|
15
|
+
"""Validate staging table for link2fact mode."""
|
|
16
|
+
if verbose:
|
|
17
|
+
raise Exception("verbose is not implemented yet")
|
|
18
|
+
|
|
19
|
+
# TODO: Implement link2fact specific validation logic
|
|
20
|
+
# This will likely be different from scd2dim validation
|
|
21
|
+
# For example: checking for link keys, fact columns, etc.
|
|
22
|
+
|
|
23
|
+
bk_keys = []
|
|
24
|
+
hk_key = None
|
|
25
|
+
business_column_types = {}
|
|
26
|
+
system_column_types = {}
|
|
27
|
+
link_keys = [] # New concept for link2fact
|
|
28
|
+
fact_columns = [] # New concept for link2fact
|
|
29
|
+
|
|
30
|
+
# Placeholder validation logic
|
|
31
|
+
for col in stg_table.columns.values():
|
|
32
|
+
# TODO: Implement column classification for link2fact mode
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
"stg_schema": stg_table.schema,
|
|
37
|
+
"entity_name": stg_table.name,
|
|
38
|
+
"bk_keys": bk_keys,
|
|
39
|
+
"hk_key": hk_key,
|
|
40
|
+
"business_column_types": business_column_types,
|
|
41
|
+
"system_column_types": system_column_types,
|
|
42
|
+
"link_keys": link_keys,
|
|
43
|
+
"fact_columns": fact_columns,
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Link2FactSchemaGenerator(SchemaGenerator):
|
|
48
|
+
|
|
49
|
+
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
50
|
+
self.dialect_handler = dialect_handler
|
|
51
|
+
self._validation_result = validation_result
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def entity_info(self) -> ValidationResult:
|
|
55
|
+
return self._validation_result
|
|
56
|
+
|
|
57
|
+
def make_tables(self) -> dict[str, Table]:
|
|
58
|
+
"""Create link and fact tables for link2fact mode."""
|
|
59
|
+
# TODO: Implement link2fact table creation
|
|
60
|
+
# This will create different table structures than scd2dim
|
|
61
|
+
# For example: link table, fact table (instead of hub/sat/dim)
|
|
62
|
+
|
|
63
|
+
entity_name = self._validation_result.entity_name
|
|
64
|
+
|
|
65
|
+
# Placeholder - actual implementation needed
|
|
66
|
+
link_table: Table | None = None
|
|
67
|
+
fact_table: Table | None = None
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
"link": link_table,
|
|
71
|
+
"fact": fact_table,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def make_procedures(
|
|
75
|
+
self,
|
|
76
|
+
tables: dict[str, Table],
|
|
77
|
+
entity_registration_date: datetime,
|
|
78
|
+
) -> dict[str, Tuple[str, str]]:
|
|
79
|
+
"""Generate procedures for link2fact mode."""
|
|
80
|
+
procedures = {}
|
|
81
|
+
|
|
82
|
+
# TODO: Implement link2fact procedure generation using dialect_handler
|
|
83
|
+
# This will generate different procedures than scd2dim
|
|
84
|
+
# For example: link population, fact population, aggregation logic, etc.
|
|
85
|
+
|
|
86
|
+
# When implementing, use self.dialect_handler methods to generate SQL
|
|
87
|
+
# Example:
|
|
88
|
+
# link_proc_code, link_proc_name = self.dialect_handler.make_link_proc(...)
|
|
89
|
+
# procedures["link"] = (link_proc_code, link_proc_name)
|
|
90
|
+
|
|
91
|
+
return procedures
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Iterator, Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Column, MetaData, Table, UniqueConstraint
|
|
5
|
+
|
|
6
|
+
from src.sandwich import SANDWICH_VERSION
|
|
7
|
+
from src.sandwich.dialects.base import DialectHandler
|
|
8
|
+
from src.sandwich.modeling import modeling_metadata, StgInfo
|
|
9
|
+
|
|
10
|
+
from .base import Validator, SchemaGenerator, ValidationResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Scd2DimValidator(Validator):
|
|
14
|
+
|
|
15
|
+
def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
|
|
16
|
+
"""Validate staging table or view for `scd2dim` template.
|
|
17
|
+
|
|
18
|
+
Raises: Exception"""
|
|
19
|
+
if verbose:
|
|
20
|
+
raise Exception("verbose is not implemented yet")
|
|
21
|
+
|
|
22
|
+
# -----------------
|
|
23
|
+
# hk
|
|
24
|
+
# -----------------
|
|
25
|
+
# only one hash key is allowed for `scd2dim` profile
|
|
26
|
+
# and its name should match `hk_[entity_name]` pattern
|
|
27
|
+
hk_count = len(stg_info.hk_keys)
|
|
28
|
+
if hk_count == 0:
|
|
29
|
+
raise Exception("hk column is required for `scd2dim` validation")
|
|
30
|
+
elif hk_count > 1:
|
|
31
|
+
raise Exception(f"More than one hk column found in stg.{stg_info.stg_name}")
|
|
32
|
+
# hk_key = (key_name, key_type)
|
|
33
|
+
hk_key = list(stg_info.hk_keys.items())[0]
|
|
34
|
+
if hk_key[0] != f"hk_{stg_info.stg_name}":
|
|
35
|
+
raise Exception(f"hk column has invalid name '{hk_key[0]}'")
|
|
36
|
+
|
|
37
|
+
# -----------------
|
|
38
|
+
# BKs
|
|
39
|
+
# -----------------
|
|
40
|
+
# You don't need a hub or/and a dim tables for a non-business entity.
|
|
41
|
+
# So you have to have at least one business key, and you can have more.
|
|
42
|
+
# Naming convention is to just add a `bk_` prefix to the original key name
|
|
43
|
+
# because we want to keep information of the original names
|
|
44
|
+
if len(stg_info.bk_keys) == 0:
|
|
45
|
+
raise Exception("bk column(s) are required for `scd2dim` validation")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
system_column_names = stg_info.sys_columns.keys()
|
|
49
|
+
|
|
50
|
+
# universal check - all dv2 raw objects should be auditable
|
|
51
|
+
for required_col in modeling_metadata.required_columns:
|
|
52
|
+
if required_col not in system_column_names:
|
|
53
|
+
raise Exception(f"{required_col} column is required")
|
|
54
|
+
|
|
55
|
+
# scd2dim specific validations
|
|
56
|
+
if modeling_metadata.hashdiff not in system_column_names:
|
|
57
|
+
raise Exception(f"{modeling_metadata.hashdiff} column is required for scd2dim validation")
|
|
58
|
+
if modeling_metadata.is_available not in system_column_names:
|
|
59
|
+
raise Exception(f"{modeling_metadata.is_available} column is required for scd2dim validation")
|
|
60
|
+
|
|
61
|
+
return ValidationResult(
|
|
62
|
+
stg_schema=stg_info.stg_schema,
|
|
63
|
+
entity_name=stg_info.stg_name,
|
|
64
|
+
bk_keys=[(nm, tp) for nm, tp in stg_info.bk_keys.items()],
|
|
65
|
+
hk_key=hk_key,
|
|
66
|
+
business_column_types=stg_info.bus_columns,
|
|
67
|
+
system_column_types=stg_info.sys_columns,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
class Scd2DimSchemaGenerator(SchemaGenerator):
|
|
71
|
+
|
|
72
|
+
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
73
|
+
self.dialect_handler = dialect_handler
|
|
74
|
+
self._validation_result = validation_result
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def entity_info(self) -> ValidationResult:
|
|
78
|
+
return self._validation_result
|
|
79
|
+
|
|
80
|
+
def make_tables(self) -> dict[str, Table]:
|
|
81
|
+
entity_name = self._validation_result.entity_name
|
|
82
|
+
bk_keys = self._validation_result.bk_keys
|
|
83
|
+
hk_key = self._validation_result.hk_key
|
|
84
|
+
business_column_types = self._validation_result.business_column_types
|
|
85
|
+
system_column_types = self._validation_result.system_column_types
|
|
86
|
+
|
|
87
|
+
# Helper functions for creating columns
|
|
88
|
+
def get_bk_columns() -> Iterator[Column]:
|
|
89
|
+
return (Column(bk_key[0], bk_key[1], nullable=False) for bk_key in bk_keys)
|
|
90
|
+
|
|
91
|
+
def get_bk_pk_columns() -> Iterator[Column]:
|
|
92
|
+
return (Column(bk_key[0], bk_key[1], primary_key=True) for bk_key in bk_keys)
|
|
93
|
+
|
|
94
|
+
def get_hk_pk_column() -> Column:
|
|
95
|
+
return Column(hk_key[0], hk_key[1], primary_key=True)
|
|
96
|
+
|
|
97
|
+
def get_loaddate_column() -> Column:
|
|
98
|
+
_load_date = modeling_metadata.loaddate
|
|
99
|
+
_load_date_type = system_column_types[_load_date]
|
|
100
|
+
return Column(_load_date, _load_date_type, nullable=False)
|
|
101
|
+
|
|
102
|
+
def get_loaddate_pk_column() -> Column:
|
|
103
|
+
_load_date = modeling_metadata.loaddate
|
|
104
|
+
_load_date_type = system_column_types[_load_date]
|
|
105
|
+
return Column(_load_date, _load_date_type, primary_key=True)
|
|
106
|
+
|
|
107
|
+
def get_datefrom_pk_column() -> Column:
|
|
108
|
+
_load_date = modeling_metadata.loaddate
|
|
109
|
+
_load_date_type = system_column_types[_load_date]
|
|
110
|
+
return Column("DateFrom", _load_date_type, primary_key=True)
|
|
111
|
+
|
|
112
|
+
def get_dateto_column() -> Column:
|
|
113
|
+
_load_date = modeling_metadata.loaddate
|
|
114
|
+
_load_date_type = system_column_types[_load_date]
|
|
115
|
+
return Column("DateTo", _load_date_type, nullable=True)
|
|
116
|
+
|
|
117
|
+
def get_recordsource_column() -> Column:
|
|
118
|
+
_record_source = modeling_metadata.recordsource
|
|
119
|
+
_record_source_type = system_column_types[_record_source]
|
|
120
|
+
return Column(_record_source, _record_source_type, nullable=False)
|
|
121
|
+
|
|
122
|
+
def get_business_columns() -> Iterator[Column]:
|
|
123
|
+
return (Column(col_name, col_type, nullable=True) for (col_name, col_type) in business_column_types.items())
|
|
124
|
+
|
|
125
|
+
def get_is_available_column() -> Column:
|
|
126
|
+
_is_available = modeling_metadata.is_available
|
|
127
|
+
_is_available_type = system_column_types[_is_available]
|
|
128
|
+
return Column(_is_available, _is_available_type, nullable=False)
|
|
129
|
+
|
|
130
|
+
def get_hashdiff_column() -> Column:
|
|
131
|
+
_hashdiff = modeling_metadata.hashdiff
|
|
132
|
+
_hashdiff_type = system_column_types[_hashdiff]
|
|
133
|
+
return Column(_hashdiff, _hashdiff_type, nullable=False)
|
|
134
|
+
|
|
135
|
+
# Create hub table
|
|
136
|
+
hub_table = Table(entity_name, MetaData(), schema="hub")
|
|
137
|
+
for bk_col in get_bk_columns():
|
|
138
|
+
hub_table.append_column(bk_col)
|
|
139
|
+
hub_table.append_column(get_hk_pk_column())
|
|
140
|
+
hub_table.append_column(get_loaddate_column())
|
|
141
|
+
hub_table.append_column(get_recordsource_column())
|
|
142
|
+
hub_table.append_constraint(UniqueConstraint(*[bk[0] for bk in bk_keys]))
|
|
143
|
+
|
|
144
|
+
# Create sat table
|
|
145
|
+
sat_table = Table(entity_name, MetaData(), schema="sat")
|
|
146
|
+
for bk_col in get_bk_columns():
|
|
147
|
+
sat_table.append_column(bk_col)
|
|
148
|
+
sat_table.append_column(get_hk_pk_column())
|
|
149
|
+
sat_table.append_column(get_loaddate_pk_column())
|
|
150
|
+
sat_table.append_column(get_recordsource_column())
|
|
151
|
+
sat_table.append_column(get_hashdiff_column())
|
|
152
|
+
for business_col in get_business_columns():
|
|
153
|
+
sat_table.append_column(business_col)
|
|
154
|
+
sat_table.append_column(get_is_available_column())
|
|
155
|
+
|
|
156
|
+
# Create dim table
|
|
157
|
+
dim_table = Table(entity_name, MetaData(), schema="dim")
|
|
158
|
+
for bk_col in get_bk_pk_columns():
|
|
159
|
+
dim_table.append_column(bk_col)
|
|
160
|
+
for business_col in get_business_columns():
|
|
161
|
+
dim_table.append_column(business_col)
|
|
162
|
+
dim_table.append_column(get_is_available_column())
|
|
163
|
+
dim_table.append_column(Column("IsCurrent", self.dialect_handler.get_boolean_type(), nullable=False))
|
|
164
|
+
dim_table.append_column(get_datefrom_pk_column())
|
|
165
|
+
dim_table.append_column(get_dateto_column())
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
"hub": hub_table,
|
|
169
|
+
"sat": sat_table,
|
|
170
|
+
"dim": dim_table,
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
def make_procedures(self, tables: dict[str, Table]
|
|
174
|
+
, entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
|
|
175
|
+
procedures = {}
|
|
176
|
+
|
|
177
|
+
header = modeling_metadata.HEADER_TEMPLATE.format(
|
|
178
|
+
created_on=entity_registration_date,
|
|
179
|
+
updated_on=datetime.now(),
|
|
180
|
+
version=SANDWICH_VERSION,
|
|
181
|
+
entity_name=self._validation_result.entity_name
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
stg_proc_name = None
|
|
185
|
+
if self._validation_result.stg_schema == "proxy":
|
|
186
|
+
stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
|
|
187
|
+
entity_name=self._validation_result.entity_name,
|
|
188
|
+
header=header
|
|
189
|
+
)
|
|
190
|
+
procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
|
|
191
|
+
|
|
192
|
+
hub_table = tables["hub"]
|
|
193
|
+
hub_proc_code, hub_proc_name, hub_call_stmt = self.dialect_handler.make_hub_proc(
|
|
194
|
+
hub_table=hub_table,
|
|
195
|
+
bk_keys=self._validation_result.bk_keys,
|
|
196
|
+
header=header
|
|
197
|
+
)
|
|
198
|
+
procedures["hub"] = (hub_proc_code, hub_proc_name, hub_call_stmt)
|
|
199
|
+
|
|
200
|
+
# Generate sat procedure
|
|
201
|
+
sat_table = tables["sat"]
|
|
202
|
+
sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_sat_proc(
|
|
203
|
+
sat_table=sat_table,
|
|
204
|
+
hk_name=self._validation_result.hk_key[0],
|
|
205
|
+
hashdiff_col=modeling_metadata.hashdiff,
|
|
206
|
+
is_available_col=modeling_metadata.is_available,
|
|
207
|
+
loaddate_col=modeling_metadata.loaddate,
|
|
208
|
+
stg_schema=self._validation_result.stg_schema,
|
|
209
|
+
header=header
|
|
210
|
+
)
|
|
211
|
+
procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
|
|
212
|
+
|
|
213
|
+
# Generate dim procedure
|
|
214
|
+
dim_table = tables["dim"]
|
|
215
|
+
dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_dim_scd2_proc(
|
|
216
|
+
dim_table=dim_table,
|
|
217
|
+
bk_keys=self._validation_result.bk_keys,
|
|
218
|
+
header=header
|
|
219
|
+
)
|
|
220
|
+
procedures["dim"] = (dim_proc_code, dim_proc_name, dim_call_stmt)
|
|
221
|
+
|
|
222
|
+
# Generate job procedure
|
|
223
|
+
job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
|
|
224
|
+
entity_name=self._validation_result.entity_name,
|
|
225
|
+
hub_proc_name=hub_proc_name,
|
|
226
|
+
sat_proc_name=sat_proc_name,
|
|
227
|
+
dim_proc_name=dim_proc_name,
|
|
228
|
+
stg_proc_name=stg_proc_name,
|
|
229
|
+
header=header
|
|
230
|
+
)
|
|
231
|
+
procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
|
|
232
|
+
|
|
233
|
+
# Generate drop procedure
|
|
234
|
+
drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
|
|
235
|
+
entity_name=self._validation_result.entity_name,
|
|
236
|
+
stg_schema=self._validation_result.stg_schema,
|
|
237
|
+
job_proc_name=job_proc_name,
|
|
238
|
+
stg_proc_name=stg_proc_name,
|
|
239
|
+
hub_proc_name=hub_proc_name,
|
|
240
|
+
sat_proc_name=sat_proc_name,
|
|
241
|
+
dim_proc_name=dim_proc_name,
|
|
242
|
+
header=header
|
|
243
|
+
)
|
|
244
|
+
procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
|
|
245
|
+
|
|
246
|
+
return procedures
|