sandwich 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandwich/__init__.py +3 -0
- sandwich/dialects/__init__.py +12 -0
- sandwich/dialects/base.py +136 -0
- sandwich/dialects/ddl_mssql.py +123 -0
- sandwich/dialects/ddl_postgres.py +114 -0
- sandwich/dialects/factory.py +27 -0
- sandwich/dialects/mssql.py +281 -0
- sandwich/dialects/postgres.py +107 -0
- sandwich/dialects/utils.py +147 -0
- sandwich/dwh/__init__.py +82 -0
- sandwich/errors.py +25 -0
- sandwich/main.py +0 -0
- sandwich/modeling/__init__.py +120 -0
- sandwich/modeling/strategies/__init__.py +15 -0
- sandwich/modeling/strategies/base.py +94 -0
- sandwich/modeling/strategies/factory.py +39 -0
- sandwich/modeling/strategies/link2fact.py +225 -0
- sandwich/modeling/strategies/scd2dim.py +228 -0
- {sandwich-0.2.1.dist-info → sandwich-0.3.0.dist-info}/METADATA +170 -155
- sandwich-0.3.0.dist-info/RECORD +23 -0
- sandwich-0.3.0.dist-info/WHEEL +4 -0
- sandwich-0.3.0.dist-info/entry_points.txt +3 -0
- sandwich-0.2.1.dist-info/RECORD +0 -5
- sandwich-0.2.1.dist-info/WHEEL +0 -4
- sandwich-0.2.1.dist-info/licenses/LICENSE +0 -9
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Link to Fact strategy implementations."""
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Column, MetaData, Table, UniqueConstraint
|
|
6
|
+
|
|
7
|
+
from sandwich import SANDWICH_VERSION
|
|
8
|
+
from sandwich.dialects.base import DialectHandler
|
|
9
|
+
from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
|
|
10
|
+
|
|
11
|
+
from .base import BaseValidator, SchemaGenerator, ValidationResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Link2FactValidator(BaseValidator):
|
|
15
|
+
def __init__(self, template: str):
|
|
16
|
+
super().__init__(template)
|
|
17
|
+
self._on_validate_staging = self._validate_staging
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _validate_staging(stg_info: StgInfo, sys_info: Dv2SystemInfo) -> None:
|
|
21
|
+
# -----------------
|
|
22
|
+
# hk
|
|
23
|
+
# -----------------
|
|
24
|
+
hk_count = len(stg_info.hk_keys)
|
|
25
|
+
stg_full_name = f"stg.{stg_info.stg_name}"
|
|
26
|
+
|
|
27
|
+
# so only exactly 2 or 3 columns allowed right now
|
|
28
|
+
if hk_count < 2: # own and at least one foreign
|
|
29
|
+
raise Exception(f"At least 2 hk columns expected in `{stg_full_name}` for the `link2fact` template")
|
|
30
|
+
if hk_count > 3:
|
|
31
|
+
raise Exception(f"{hk_count} hk columns in `{stg_full_name}` for the `link2fact` template?! Are you sure?")
|
|
32
|
+
if stg_info.degenerate_field is None:
|
|
33
|
+
raise Exception(f"Degenerate field is required for `{stg_full_name}` for the `link2fact` template")
|
|
34
|
+
|
|
35
|
+
# hk_key = (key_name, key_type)
|
|
36
|
+
hk_keys_copy = list(stg_info.hk_keys.keys()).copy()
|
|
37
|
+
is_own_hk_found = False
|
|
38
|
+
expected_own_hk_column_name = f"hk_{stg_info.stg_name}"
|
|
39
|
+
|
|
40
|
+
for hk_name in stg_info.hk_keys.keys():
|
|
41
|
+
if hk_name == expected_own_hk_column_name:
|
|
42
|
+
hk_keys_copy.remove(hk_name)
|
|
43
|
+
is_own_hk_found = True
|
|
44
|
+
else:
|
|
45
|
+
# check that `name` from `hk_[name]` is existing entity
|
|
46
|
+
for en in sys_info.entities_list:
|
|
47
|
+
if hk_name == f"hk_{en.entity_name}":
|
|
48
|
+
hk_keys_copy.remove(hk_name)
|
|
49
|
+
|
|
50
|
+
if not is_own_hk_found:
|
|
51
|
+
raise Exception(f"Column `{expected_own_hk_column_name}` has not been found in `{stg_full_name}`")
|
|
52
|
+
|
|
53
|
+
if len(hk_keys_copy) > 0:
|
|
54
|
+
raise Exception(f"There are no entities related to `{", ".join(hk_keys_copy)}` columns in the `{stg_full_name}`")
|
|
55
|
+
|
|
56
|
+
# -----------------
|
|
57
|
+
# BKs
|
|
58
|
+
# -----------------
|
|
59
|
+
# it should be a warning, not an error
|
|
60
|
+
if len(stg_info.bk_keys) > 0:
|
|
61
|
+
raise Exception("You dont need bk columns for the `link2fact` template")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Link2FactSchemaGenerator(SchemaGenerator):
|
|
65
|
+
|
|
66
|
+
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
67
|
+
self.dialect_handler = dialect_handler
|
|
68
|
+
self._validation_result = validation_result
|
|
69
|
+
|
|
70
|
+
def make_tables(self) -> dict[str, Table]:
|
|
71
|
+
return {
|
|
72
|
+
"link": self.make_link_table(),
|
|
73
|
+
"sat": self.make_sat_table(),
|
|
74
|
+
#"fact": self.make_fact_table(),
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
def make_link_table(self) -> Table:
|
|
78
|
+
entity_name = self._validation_result.entity_name
|
|
79
|
+
|
|
80
|
+
# Create link table
|
|
81
|
+
link_table = Table(entity_name, MetaData(), schema="link")
|
|
82
|
+
uks: list[str] = []
|
|
83
|
+
|
|
84
|
+
# HKs (own and FKs)
|
|
85
|
+
for hk_key in self._validation_result.hk_keys:
|
|
86
|
+
if hk_key[0] == f"hk_{entity_name}":
|
|
87
|
+
col = Column(hk_key[0], hk_key[1], primary_key=True)
|
|
88
|
+
else:
|
|
89
|
+
uks.append(hk_key[0])
|
|
90
|
+
col = Column(hk_key[0], hk_key[1], nullable=False)
|
|
91
|
+
link_table.append_column(col)
|
|
92
|
+
dg_key = self._validation_result.degenerate_field
|
|
93
|
+
link_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
|
|
94
|
+
uks.append(dg_key[0])
|
|
95
|
+
link_table.append_constraint(UniqueConstraint(*uks))
|
|
96
|
+
|
|
97
|
+
# LoadDate
|
|
98
|
+
load_date = modeling_metadata.loaddate
|
|
99
|
+
load_date_type = self._validation_result.system_column_types[load_date]
|
|
100
|
+
load_date_col = Column(load_date, load_date_type, nullable=False)
|
|
101
|
+
link_table.append_column(load_date_col)
|
|
102
|
+
|
|
103
|
+
# RecordSource
|
|
104
|
+
record_source = modeling_metadata.recordsource
|
|
105
|
+
record_source_type = self._validation_result.system_column_types[record_source]
|
|
106
|
+
record_source_col = Column(record_source, record_source_type, nullable=False)
|
|
107
|
+
link_table.append_column(record_source_col)
|
|
108
|
+
|
|
109
|
+
return link_table
|
|
110
|
+
|
|
111
|
+
def make_sat_table(self) -> Table:
|
|
112
|
+
entity_name = self._validation_result.entity_name
|
|
113
|
+
|
|
114
|
+
# Create sat table
|
|
115
|
+
sat_table = Table(entity_name, MetaData(), schema="sat")
|
|
116
|
+
|
|
117
|
+
# own HK
|
|
118
|
+
for hk_key in self._validation_result.hk_keys:
|
|
119
|
+
if hk_key[0] == f"hk_{entity_name}":
|
|
120
|
+
col = Column(hk_key[0], hk_key[1], primary_key=True)
|
|
121
|
+
sat_table.append_column(col)
|
|
122
|
+
# for transactional links
|
|
123
|
+
dg_key = self._validation_result.degenerate_field
|
|
124
|
+
if dg_key is not None:
|
|
125
|
+
sat_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# LoadDate
|
|
129
|
+
load_date = modeling_metadata.loaddate
|
|
130
|
+
load_date_type = self._validation_result.system_column_types[load_date]
|
|
131
|
+
load_date_col = Column(load_date, load_date_type, nullable=False)
|
|
132
|
+
sat_table.append_column(load_date_col)
|
|
133
|
+
|
|
134
|
+
# RecordSource
|
|
135
|
+
record_source = modeling_metadata.recordsource
|
|
136
|
+
record_source_type = self._validation_result.system_column_types[record_source]
|
|
137
|
+
record_source_col = Column(record_source, record_source_type, nullable=False)
|
|
138
|
+
sat_table.append_column(record_source_col)
|
|
139
|
+
|
|
140
|
+
for (name_, type_) in self._validation_result.business_column_types.items():
|
|
141
|
+
col = Column(name_, type_, nullable=True)
|
|
142
|
+
sat_table.append_column(col)
|
|
143
|
+
|
|
144
|
+
return sat_table
|
|
145
|
+
|
|
146
|
+
def make_fact_table(self) -> Table:
|
|
147
|
+
entity_name = self._validation_result.entity_name
|
|
148
|
+
|
|
149
|
+
# Create a fact table
|
|
150
|
+
fact_table = Table(entity_name, MetaData(), schema="fact")
|
|
151
|
+
|
|
152
|
+
# not own HKs only
|
|
153
|
+
for hk_key in self._validation_result.hk_keys:
|
|
154
|
+
if hk_key[0] != f"hk_{entity_name}":
|
|
155
|
+
col = Column(hk_key[0], hk_key[1], primary_key=True)
|
|
156
|
+
fact_table.append_column(col)
|
|
157
|
+
|
|
158
|
+
for (name_, type_) in self._validation_result.business_column_types.items():
|
|
159
|
+
col = Column(name_, type_, nullable=True)
|
|
160
|
+
fact_table.append_column(col)
|
|
161
|
+
|
|
162
|
+
return fact_table
|
|
163
|
+
|
|
164
|
+
def make_procedures(self, tables: dict[str, Table]
|
|
165
|
+
, entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
|
|
166
|
+
procedures = {}
|
|
167
|
+
|
|
168
|
+
header = modeling_metadata.HEADER_TEMPLATE.format(
|
|
169
|
+
created_on=entity_registration_date,
|
|
170
|
+
updated_on=datetime.now(),
|
|
171
|
+
version=SANDWICH_VERSION,
|
|
172
|
+
entity_name=self._validation_result.entity_name
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
stg_proc_name = None
|
|
176
|
+
if self._validation_result.stg_schema == "proxy":
|
|
177
|
+
stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
|
|
178
|
+
entity_name=self._validation_result.entity_name,
|
|
179
|
+
header=header
|
|
180
|
+
)
|
|
181
|
+
procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
|
|
182
|
+
|
|
183
|
+
link_table = tables["link"]
|
|
184
|
+
link_proc_code, link_proc_name, link_call_stmt = self.dialect_handler.make_link_proc(
|
|
185
|
+
link_table=link_table,
|
|
186
|
+
hk_keys=self._validation_result.hk_keys + [self._validation_result.degenerate_field],
|
|
187
|
+
header=header
|
|
188
|
+
)
|
|
189
|
+
procedures["link"] = (link_proc_code, link_proc_name, link_call_stmt)
|
|
190
|
+
|
|
191
|
+
sat_table = tables["sat"]
|
|
192
|
+
sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd0_sat_proc(
|
|
193
|
+
sat_table=sat_table,
|
|
194
|
+
header=header
|
|
195
|
+
)
|
|
196
|
+
procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
|
|
197
|
+
|
|
198
|
+
# job procedure
|
|
199
|
+
job_proc_names = [] # order-sensitive
|
|
200
|
+
if self._validation_result.stg_schema == "proxy":
|
|
201
|
+
job_proc_names.append(stg_proc_name)
|
|
202
|
+
job_proc_names.extend([link_proc_name, sat_proc_name])
|
|
203
|
+
job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
|
|
204
|
+
entity_name=self._validation_result.entity_name,
|
|
205
|
+
proc_names=job_proc_names,
|
|
206
|
+
header=header
|
|
207
|
+
)
|
|
208
|
+
procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
|
|
209
|
+
|
|
210
|
+
# drop procedure
|
|
211
|
+
drop_table_schemas = ["link", "sat"]
|
|
212
|
+
if self._validation_result.stg_schema == "proxy":
|
|
213
|
+
drop_table_schemas.append("stg")
|
|
214
|
+
drop_proc_names = [link_proc_name, sat_proc_name]
|
|
215
|
+
if self._validation_result.stg_schema == "proxy":
|
|
216
|
+
drop_proc_names.append(stg_proc_name)
|
|
217
|
+
drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
|
|
218
|
+
entity_name=self._validation_result.entity_name,
|
|
219
|
+
table_schemas=drop_table_schemas,
|
|
220
|
+
procedures=drop_proc_names,
|
|
221
|
+
header=header
|
|
222
|
+
)
|
|
223
|
+
procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
|
|
224
|
+
|
|
225
|
+
return procedures
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Iterator, Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Column, MetaData, Table, UniqueConstraint
|
|
5
|
+
|
|
6
|
+
from sandwich import SANDWICH_VERSION
|
|
7
|
+
from sandwich.dialects.base import DialectHandler
|
|
8
|
+
from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
|
|
9
|
+
|
|
10
|
+
from .base import BaseValidator, SchemaGenerator, ValidationResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Scd2DimValidator(BaseValidator):
|
|
14
|
+
def __init__(self, template: str):
|
|
15
|
+
super().__init__(template)
|
|
16
|
+
self._on_validate_staging = self._validate_staging
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _validate_staging(stg_info: StgInfo, _: Dv2SystemInfo) -> None:
|
|
20
|
+
# -----------------
|
|
21
|
+
# hk
|
|
22
|
+
# -----------------
|
|
23
|
+
# only one hash key is allowed for `scd2dim` profile
|
|
24
|
+
# and its name should match `hk_[entity_name]` pattern
|
|
25
|
+
hk_count = len(stg_info.hk_keys)
|
|
26
|
+
if hk_count == 0:
|
|
27
|
+
raise Exception("hk column is required for `scd2dim` validation")
|
|
28
|
+
elif hk_count > 1:
|
|
29
|
+
raise Exception(f"More than one hk column found in stg.{stg_info.stg_name}")
|
|
30
|
+
# hk_key = (key_name, key_type)
|
|
31
|
+
hk_key = list(stg_info.hk_keys.items())[0]
|
|
32
|
+
if hk_key[0] != f"hk_{stg_info.stg_name}":
|
|
33
|
+
raise Exception(f"hk column has invalid name '{hk_key[0]}'")
|
|
34
|
+
|
|
35
|
+
# -----------------
|
|
36
|
+
# BKs
|
|
37
|
+
# -----------------
|
|
38
|
+
# You don't need a hub or/and a dim tables for a non-business entity.
|
|
39
|
+
# So you have to have at least one business key, and you can have more.
|
|
40
|
+
# Naming convention is to just add a `bk_` prefix to the original key name
|
|
41
|
+
# because we want to keep information of the original names
|
|
42
|
+
if len(stg_info.bk_keys) == 0:
|
|
43
|
+
raise Exception("bk column(s) are required for `scd2dim` validation")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
system_column_names = stg_info.sys_columns.keys()
|
|
47
|
+
|
|
48
|
+
if modeling_metadata.hashdiff not in system_column_names:
|
|
49
|
+
raise Exception(f"{modeling_metadata.hashdiff} column is required for scd2dim validation")
|
|
50
|
+
if modeling_metadata.is_available not in system_column_names:
|
|
51
|
+
raise Exception(f"{modeling_metadata.is_available} column is required for scd2dim validation")
|
|
52
|
+
|
|
53
|
+
class Scd2DimSchemaGenerator(SchemaGenerator):
|
|
54
|
+
|
|
55
|
+
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
56
|
+
self.dialect_handler = dialect_handler
|
|
57
|
+
self._validation_result = validation_result
|
|
58
|
+
|
|
59
|
+
def make_tables(self) -> dict[str, Table]:
|
|
60
|
+
entity_name = self._validation_result.entity_name
|
|
61
|
+
bk_keys = self._validation_result.bk_keys
|
|
62
|
+
hk_key = self._validation_result.hk_keys[0]
|
|
63
|
+
business_column_types = self._validation_result.business_column_types
|
|
64
|
+
system_column_types = self._validation_result.system_column_types
|
|
65
|
+
|
|
66
|
+
# Helper functions for creating columns
|
|
67
|
+
def get_bk_columns() -> Iterator[Column]:
|
|
68
|
+
return (Column(bk_key[0], bk_key[1], nullable=False) for bk_key in bk_keys)
|
|
69
|
+
|
|
70
|
+
def get_bk_pk_columns() -> Iterator[Column]:
|
|
71
|
+
return (Column(bk_key[0], bk_key[1], primary_key=True) for bk_key in bk_keys)
|
|
72
|
+
|
|
73
|
+
def get_hk_pk_column() -> Column:
|
|
74
|
+
return Column(hk_key[0], hk_key[1], primary_key=True)
|
|
75
|
+
|
|
76
|
+
def get_loaddate_column() -> Column:
|
|
77
|
+
_load_date = modeling_metadata.loaddate
|
|
78
|
+
_load_date_type = system_column_types[_load_date]
|
|
79
|
+
return Column(_load_date, _load_date_type, nullable=False)
|
|
80
|
+
|
|
81
|
+
def get_loaddate_pk_column() -> Column:
|
|
82
|
+
_load_date = modeling_metadata.loaddate
|
|
83
|
+
_load_date_type = system_column_types[_load_date]
|
|
84
|
+
return Column(_load_date, _load_date_type, primary_key=True)
|
|
85
|
+
|
|
86
|
+
def get_datefrom_pk_column() -> Column:
|
|
87
|
+
_load_date = modeling_metadata.loaddate
|
|
88
|
+
_load_date_type = system_column_types[_load_date]
|
|
89
|
+
return Column("DateFrom", _load_date_type, primary_key=True)
|
|
90
|
+
|
|
91
|
+
def get_dateto_column() -> Column:
|
|
92
|
+
_load_date = modeling_metadata.loaddate
|
|
93
|
+
_load_date_type = system_column_types[_load_date]
|
|
94
|
+
return Column("DateTo", _load_date_type, nullable=True)
|
|
95
|
+
|
|
96
|
+
def get_recordsource_column() -> Column:
|
|
97
|
+
_record_source = modeling_metadata.recordsource
|
|
98
|
+
_record_source_type = system_column_types[_record_source]
|
|
99
|
+
return Column(_record_source, _record_source_type, nullable=False)
|
|
100
|
+
|
|
101
|
+
def get_business_columns() -> Iterator[Column]:
|
|
102
|
+
return (Column(col_name, col_type, nullable=True) for (col_name, col_type) in business_column_types.items())
|
|
103
|
+
|
|
104
|
+
def get_is_available_column() -> Column:
|
|
105
|
+
_is_available = modeling_metadata.is_available
|
|
106
|
+
_is_available_type = system_column_types[_is_available]
|
|
107
|
+
return Column(_is_available, _is_available_type, nullable=False)
|
|
108
|
+
|
|
109
|
+
def get_hashdiff_column() -> Column:
|
|
110
|
+
_hashdiff = modeling_metadata.hashdiff
|
|
111
|
+
_hashdiff_type = system_column_types[_hashdiff]
|
|
112
|
+
return Column(_hashdiff, _hashdiff_type, nullable=False)
|
|
113
|
+
|
|
114
|
+
# Create hub table
|
|
115
|
+
hub_table = Table(entity_name, MetaData(), schema="hub")
|
|
116
|
+
for bk_col in get_bk_columns():
|
|
117
|
+
hub_table.append_column(bk_col)
|
|
118
|
+
hub_table.append_column(get_hk_pk_column())
|
|
119
|
+
hub_table.append_column(get_loaddate_column())
|
|
120
|
+
hub_table.append_column(get_recordsource_column())
|
|
121
|
+
hub_table.append_constraint(UniqueConstraint(*[bk[0] for bk in bk_keys]))
|
|
122
|
+
|
|
123
|
+
# Create sat table
|
|
124
|
+
sat_table = Table(entity_name, MetaData(), schema="sat")
|
|
125
|
+
for bk_col in get_bk_columns():
|
|
126
|
+
sat_table.append_column(bk_col)
|
|
127
|
+
sat_table.append_column(get_hk_pk_column())
|
|
128
|
+
sat_table.append_column(get_loaddate_pk_column())
|
|
129
|
+
sat_table.append_column(get_recordsource_column())
|
|
130
|
+
sat_table.append_column(get_hashdiff_column())
|
|
131
|
+
for business_col in get_business_columns():
|
|
132
|
+
sat_table.append_column(business_col)
|
|
133
|
+
sat_table.append_column(get_is_available_column())
|
|
134
|
+
|
|
135
|
+
# Create dim table
|
|
136
|
+
dim_table = Table(entity_name, MetaData(), schema="dim")
|
|
137
|
+
for bk_col in get_bk_pk_columns():
|
|
138
|
+
dim_table.append_column(bk_col)
|
|
139
|
+
for business_col in get_business_columns():
|
|
140
|
+
dim_table.append_column(business_col)
|
|
141
|
+
dim_table.append_column(get_is_available_column())
|
|
142
|
+
dim_table.append_column(Column("IsCurrent", self.dialect_handler.get_boolean_type(), nullable=False))
|
|
143
|
+
dim_table.append_column(get_datefrom_pk_column())
|
|
144
|
+
dim_table.append_column(get_dateto_column())
|
|
145
|
+
|
|
146
|
+
return {
|
|
147
|
+
"hub": hub_table,
|
|
148
|
+
"sat": sat_table,
|
|
149
|
+
"dim": dim_table,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
def make_procedures(self, tables: dict[str, Table]
|
|
153
|
+
, entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
|
|
154
|
+
procedures = {}
|
|
155
|
+
|
|
156
|
+
header = modeling_metadata.HEADER_TEMPLATE.format(
|
|
157
|
+
created_on=entity_registration_date,
|
|
158
|
+
updated_on=datetime.now(),
|
|
159
|
+
version=SANDWICH_VERSION,
|
|
160
|
+
entity_name=self._validation_result.entity_name
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
stg_proc_name = None
|
|
164
|
+
if self._validation_result.stg_schema == "proxy":
|
|
165
|
+
stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
|
|
166
|
+
entity_name=self._validation_result.entity_name,
|
|
167
|
+
header=header
|
|
168
|
+
)
|
|
169
|
+
procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
|
|
170
|
+
|
|
171
|
+
hub_table = tables["hub"]
|
|
172
|
+
hub_proc_code, hub_proc_name, hub_call_stmt = self.dialect_handler.make_hub_proc(
|
|
173
|
+
hub_table=hub_table,
|
|
174
|
+
bk_keys=self._validation_result.bk_keys,
|
|
175
|
+
header=header
|
|
176
|
+
)
|
|
177
|
+
procedures["hub"] = (hub_proc_code, hub_proc_name, hub_call_stmt)
|
|
178
|
+
|
|
179
|
+
# Generate sat procedure
|
|
180
|
+
sat_table = tables["sat"]
|
|
181
|
+
sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd2_sat_proc(
|
|
182
|
+
sat_table=sat_table,
|
|
183
|
+
hk_name=self._validation_result.hk_keys[0][0],
|
|
184
|
+
hashdiff_col=modeling_metadata.hashdiff,
|
|
185
|
+
is_available_col=modeling_metadata.is_available,
|
|
186
|
+
loaddate_col=modeling_metadata.loaddate,
|
|
187
|
+
stg_schema=self._validation_result.stg_schema,
|
|
188
|
+
header=header
|
|
189
|
+
)
|
|
190
|
+
procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
|
|
191
|
+
|
|
192
|
+
# Generate dim procedure
|
|
193
|
+
dim_table = tables["dim"]
|
|
194
|
+
dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_scd2_dim_proc(
|
|
195
|
+
dim_table=dim_table,
|
|
196
|
+
bk_keys=self._validation_result.bk_keys,
|
|
197
|
+
header=header
|
|
198
|
+
)
|
|
199
|
+
procedures["dim"] = (dim_proc_code, dim_proc_name, dim_call_stmt)
|
|
200
|
+
|
|
201
|
+
# Generate job procedure
|
|
202
|
+
job_proc_names = [] # order-sensitive
|
|
203
|
+
if self._validation_result.stg_schema == "proxy":
|
|
204
|
+
job_proc_names.append(stg_proc_name)
|
|
205
|
+
job_proc_names.extend([hub_proc_name, sat_proc_name, dim_proc_name])
|
|
206
|
+
job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
|
|
207
|
+
entity_name=self._validation_result.entity_name,
|
|
208
|
+
proc_names=job_proc_names,
|
|
209
|
+
header=header
|
|
210
|
+
)
|
|
211
|
+
procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
|
|
212
|
+
|
|
213
|
+
# Generate drop procedure
|
|
214
|
+
drop_table_schemas = ["hub", "sat", "dim"]
|
|
215
|
+
if self._validation_result.stg_schema == "proxy":
|
|
216
|
+
drop_table_schemas.append("stg")
|
|
217
|
+
drop_proc_names = [job_proc_name, hub_proc_name, sat_proc_name, dim_proc_name]
|
|
218
|
+
if self._validation_result.stg_schema == "proxy":
|
|
219
|
+
drop_proc_names.append(stg_proc_name)
|
|
220
|
+
drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
|
|
221
|
+
entity_name=self._validation_result.entity_name,
|
|
222
|
+
table_schemas=drop_table_schemas,
|
|
223
|
+
procedures=drop_proc_names,
|
|
224
|
+
header=header
|
|
225
|
+
)
|
|
226
|
+
procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
|
|
227
|
+
|
|
228
|
+
return procedures
|