sandwich 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ """Link to Fact strategy implementations."""
2
+ from datetime import datetime
3
+ from typing import Tuple
4
+
5
+ from sqlalchemy import Column, MetaData, Table, UniqueConstraint
6
+
7
+ from sandwich import SANDWICH_VERSION
8
+ from sandwich.dialects.base import DialectHandler
9
+ from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
10
+
11
+ from .base import BaseValidator, SchemaGenerator, ValidationResult
12
+
13
+
14
+ class Link2FactValidator(BaseValidator):
15
+ def __init__(self, template: str):
16
+ super().__init__(template)
17
+ self._on_validate_staging = self._validate_staging
18
+
19
+ @staticmethod
20
+ def _validate_staging(stg_info: StgInfo, sys_info: Dv2SystemInfo) -> None:
21
+ # -----------------
22
+ # hk
23
+ # -----------------
24
+ hk_count = len(stg_info.hk_keys)
25
+ stg_full_name = f"stg.{stg_info.stg_name}"
26
+
27
+ # so only exactly 2 or 3 columns allowed right now
28
+ if hk_count < 2: # own and at least one foreign
29
+ raise Exception(f"At least 2 hk columns expected in `{stg_full_name}` for the `link2fact` template")
30
+ if hk_count > 3:
31
+ raise Exception(f"{hk_count} hk columns in `{stg_full_name}` for the `link2fact` template?! Are you sure?")
32
+ if stg_info.degenerate_field is None:
33
+ raise Exception(f"Degenerate field is required for `{stg_full_name}` for the `link2fact` template")
34
+
35
+ # hk_key = (key_name, key_type)
36
+ hk_keys_copy = list(stg_info.hk_keys.keys()).copy()
37
+ is_own_hk_found = False
38
+ expected_own_hk_column_name = f"hk_{stg_info.stg_name}"
39
+
40
+ for hk_name in stg_info.hk_keys.keys():
41
+ if hk_name == expected_own_hk_column_name:
42
+ hk_keys_copy.remove(hk_name)
43
+ is_own_hk_found = True
44
+ else:
45
+ # check that `name` from `hk_[name]` is existing entity
46
+ for en in sys_info.entities_list:
47
+ if hk_name == f"hk_{en.entity_name}":
48
+ hk_keys_copy.remove(hk_name)
49
+
50
+ if not is_own_hk_found:
51
+ raise Exception(f"Column `{expected_own_hk_column_name}` has not been found in `{stg_full_name}`")
52
+
53
+ if len(hk_keys_copy) > 0:
54
+ raise Exception(f"There are no entities related to `{", ".join(hk_keys_copy)}` columns in the `{stg_full_name}`")
55
+
56
+ # -----------------
57
+ # BKs
58
+ # -----------------
59
+ # it should be a warning, not an error
60
+ if len(stg_info.bk_keys) > 0:
61
+ raise Exception("You dont need bk columns for the `link2fact` template")
62
+
63
+
64
+ class Link2FactSchemaGenerator(SchemaGenerator):
65
+
66
+ def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
67
+ self.dialect_handler = dialect_handler
68
+ self._validation_result = validation_result
69
+
70
+ def make_tables(self) -> dict[str, Table]:
71
+ return {
72
+ "link": self.make_link_table(),
73
+ "sat": self.make_sat_table(),
74
+ #"fact": self.make_fact_table(),
75
+ }
76
+
77
+ def make_link_table(self) -> Table:
78
+ entity_name = self._validation_result.entity_name
79
+
80
+ # Create link table
81
+ link_table = Table(entity_name, MetaData(), schema="link")
82
+ uks: list[str] = []
83
+
84
+ # HKs (own and FKs)
85
+ for hk_key in self._validation_result.hk_keys:
86
+ if hk_key[0] == f"hk_{entity_name}":
87
+ col = Column(hk_key[0], hk_key[1], primary_key=True)
88
+ else:
89
+ uks.append(hk_key[0])
90
+ col = Column(hk_key[0], hk_key[1], nullable=False)
91
+ link_table.append_column(col)
92
+ dg_key = self._validation_result.degenerate_field
93
+ link_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
94
+ uks.append(dg_key[0])
95
+ link_table.append_constraint(UniqueConstraint(*uks))
96
+
97
+ # LoadDate
98
+ load_date = modeling_metadata.loaddate
99
+ load_date_type = self._validation_result.system_column_types[load_date]
100
+ load_date_col = Column(load_date, load_date_type, nullable=False)
101
+ link_table.append_column(load_date_col)
102
+
103
+ # RecordSource
104
+ record_source = modeling_metadata.recordsource
105
+ record_source_type = self._validation_result.system_column_types[record_source]
106
+ record_source_col = Column(record_source, record_source_type, nullable=False)
107
+ link_table.append_column(record_source_col)
108
+
109
+ return link_table
110
+
111
+ def make_sat_table(self) -> Table:
112
+ entity_name = self._validation_result.entity_name
113
+
114
+ # Create sat table
115
+ sat_table = Table(entity_name, MetaData(), schema="sat")
116
+
117
+ # own HK
118
+ for hk_key in self._validation_result.hk_keys:
119
+ if hk_key[0] == f"hk_{entity_name}":
120
+ col = Column(hk_key[0], hk_key[1], primary_key=True)
121
+ sat_table.append_column(col)
122
+ # for transactional links
123
+ dg_key = self._validation_result.degenerate_field
124
+ if dg_key is not None:
125
+ sat_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
126
+
127
+
128
+ # LoadDate
129
+ load_date = modeling_metadata.loaddate
130
+ load_date_type = self._validation_result.system_column_types[load_date]
131
+ load_date_col = Column(load_date, load_date_type, nullable=False)
132
+ sat_table.append_column(load_date_col)
133
+
134
+ # RecordSource
135
+ record_source = modeling_metadata.recordsource
136
+ record_source_type = self._validation_result.system_column_types[record_source]
137
+ record_source_col = Column(record_source, record_source_type, nullable=False)
138
+ sat_table.append_column(record_source_col)
139
+
140
+ for (name_, type_) in self._validation_result.business_column_types.items():
141
+ col = Column(name_, type_, nullable=True)
142
+ sat_table.append_column(col)
143
+
144
+ return sat_table
145
+
146
+ def make_fact_table(self) -> Table:
147
+ entity_name = self._validation_result.entity_name
148
+
149
+ # Create a fact table
150
+ fact_table = Table(entity_name, MetaData(), schema="fact")
151
+
152
+ # not own HKs only
153
+ for hk_key in self._validation_result.hk_keys:
154
+ if hk_key[0] != f"hk_{entity_name}":
155
+ col = Column(hk_key[0], hk_key[1], primary_key=True)
156
+ fact_table.append_column(col)
157
+
158
+ for (name_, type_) in self._validation_result.business_column_types.items():
159
+ col = Column(name_, type_, nullable=True)
160
+ fact_table.append_column(col)
161
+
162
+ return fact_table
163
+
164
+ def make_procedures(self, tables: dict[str, Table]
165
+ , entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
166
+ procedures = {}
167
+
168
+ header = modeling_metadata.HEADER_TEMPLATE.format(
169
+ created_on=entity_registration_date,
170
+ updated_on=datetime.now(),
171
+ version=SANDWICH_VERSION,
172
+ entity_name=self._validation_result.entity_name
173
+ )
174
+
175
+ stg_proc_name = None
176
+ if self._validation_result.stg_schema == "proxy":
177
+ stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
178
+ entity_name=self._validation_result.entity_name,
179
+ header=header
180
+ )
181
+ procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
182
+
183
+ link_table = tables["link"]
184
+ link_proc_code, link_proc_name, link_call_stmt = self.dialect_handler.make_link_proc(
185
+ link_table=link_table,
186
+ hk_keys=self._validation_result.hk_keys + [self._validation_result.degenerate_field],
187
+ header=header
188
+ )
189
+ procedures["link"] = (link_proc_code, link_proc_name, link_call_stmt)
190
+
191
+ sat_table = tables["sat"]
192
+ sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd0_sat_proc(
193
+ sat_table=sat_table,
194
+ header=header
195
+ )
196
+ procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
197
+
198
+ # job procedure
199
+ job_proc_names = [] # order-sensitive
200
+ if self._validation_result.stg_schema == "proxy":
201
+ job_proc_names.append(stg_proc_name)
202
+ job_proc_names.extend([link_proc_name, sat_proc_name])
203
+ job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
204
+ entity_name=self._validation_result.entity_name,
205
+ proc_names=job_proc_names,
206
+ header=header
207
+ )
208
+ procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
209
+
210
+ # drop procedure
211
+ drop_table_schemas = ["link", "sat"]
212
+ if self._validation_result.stg_schema == "proxy":
213
+ drop_table_schemas.append("stg")
214
+ drop_proc_names = [link_proc_name, sat_proc_name]
215
+ if self._validation_result.stg_schema == "proxy":
216
+ drop_proc_names.append(stg_proc_name)
217
+ drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
218
+ entity_name=self._validation_result.entity_name,
219
+ table_schemas=drop_table_schemas,
220
+ procedures=drop_proc_names,
221
+ header=header
222
+ )
223
+ procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
224
+
225
+ return procedures
@@ -0,0 +1,228 @@
1
+ from datetime import datetime
2
+ from typing import Iterator, Tuple
3
+
4
+ from sqlalchemy import Column, MetaData, Table, UniqueConstraint
5
+
6
+ from sandwich import SANDWICH_VERSION
7
+ from sandwich.dialects.base import DialectHandler
8
+ from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
9
+
10
+ from .base import BaseValidator, SchemaGenerator, ValidationResult
11
+
12
+
13
+ class Scd2DimValidator(BaseValidator):
14
+ def __init__(self, template: str):
15
+ super().__init__(template)
16
+ self._on_validate_staging = self._validate_staging
17
+
18
+ @staticmethod
19
+ def _validate_staging(stg_info: StgInfo, _: Dv2SystemInfo) -> None:
20
+ # -----------------
21
+ # hk
22
+ # -----------------
23
+ # only one hash key is allowed for `scd2dim` profile
24
+ # and its name should match `hk_[entity_name]` pattern
25
+ hk_count = len(stg_info.hk_keys)
26
+ if hk_count == 0:
27
+ raise Exception("hk column is required for `scd2dim` validation")
28
+ elif hk_count > 1:
29
+ raise Exception(f"More than one hk column found in stg.{stg_info.stg_name}")
30
+ # hk_key = (key_name, key_type)
31
+ hk_key = list(stg_info.hk_keys.items())[0]
32
+ if hk_key[0] != f"hk_{stg_info.stg_name}":
33
+ raise Exception(f"hk column has invalid name '{hk_key[0]}'")
34
+
35
+ # -----------------
36
+ # BKs
37
+ # -----------------
38
+ # You don't need a hub or/and a dim tables for a non-business entity.
39
+ # So you have to have at least one business key, and you can have more.
40
+ # Naming convention is to just add a `bk_` prefix to the original key name
41
+ # because we want to keep information of the original names
42
+ if len(stg_info.bk_keys) == 0:
43
+ raise Exception("bk column(s) are required for `scd2dim` validation")
44
+
45
+
46
+ system_column_names = stg_info.sys_columns.keys()
47
+
48
+ if modeling_metadata.hashdiff not in system_column_names:
49
+ raise Exception(f"{modeling_metadata.hashdiff} column is required for scd2dim validation")
50
+ if modeling_metadata.is_available not in system_column_names:
51
+ raise Exception(f"{modeling_metadata.is_available} column is required for scd2dim validation")
52
+
53
+ class Scd2DimSchemaGenerator(SchemaGenerator):
54
+
55
+ def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
56
+ self.dialect_handler = dialect_handler
57
+ self._validation_result = validation_result
58
+
59
+ def make_tables(self) -> dict[str, Table]:
60
+ entity_name = self._validation_result.entity_name
61
+ bk_keys = self._validation_result.bk_keys
62
+ hk_key = self._validation_result.hk_keys[0]
63
+ business_column_types = self._validation_result.business_column_types
64
+ system_column_types = self._validation_result.system_column_types
65
+
66
+ # Helper functions for creating columns
67
+ def get_bk_columns() -> Iterator[Column]:
68
+ return (Column(bk_key[0], bk_key[1], nullable=False) for bk_key in bk_keys)
69
+
70
+ def get_bk_pk_columns() -> Iterator[Column]:
71
+ return (Column(bk_key[0], bk_key[1], primary_key=True) for bk_key in bk_keys)
72
+
73
+ def get_hk_pk_column() -> Column:
74
+ return Column(hk_key[0], hk_key[1], primary_key=True)
75
+
76
+ def get_loaddate_column() -> Column:
77
+ _load_date = modeling_metadata.loaddate
78
+ _load_date_type = system_column_types[_load_date]
79
+ return Column(_load_date, _load_date_type, nullable=False)
80
+
81
+ def get_loaddate_pk_column() -> Column:
82
+ _load_date = modeling_metadata.loaddate
83
+ _load_date_type = system_column_types[_load_date]
84
+ return Column(_load_date, _load_date_type, primary_key=True)
85
+
86
+ def get_datefrom_pk_column() -> Column:
87
+ _load_date = modeling_metadata.loaddate
88
+ _load_date_type = system_column_types[_load_date]
89
+ return Column("DateFrom", _load_date_type, primary_key=True)
90
+
91
+ def get_dateto_column() -> Column:
92
+ _load_date = modeling_metadata.loaddate
93
+ _load_date_type = system_column_types[_load_date]
94
+ return Column("DateTo", _load_date_type, nullable=True)
95
+
96
+ def get_recordsource_column() -> Column:
97
+ _record_source = modeling_metadata.recordsource
98
+ _record_source_type = system_column_types[_record_source]
99
+ return Column(_record_source, _record_source_type, nullable=False)
100
+
101
+ def get_business_columns() -> Iterator[Column]:
102
+ return (Column(col_name, col_type, nullable=True) for (col_name, col_type) in business_column_types.items())
103
+
104
+ def get_is_available_column() -> Column:
105
+ _is_available = modeling_metadata.is_available
106
+ _is_available_type = system_column_types[_is_available]
107
+ return Column(_is_available, _is_available_type, nullable=False)
108
+
109
+ def get_hashdiff_column() -> Column:
110
+ _hashdiff = modeling_metadata.hashdiff
111
+ _hashdiff_type = system_column_types[_hashdiff]
112
+ return Column(_hashdiff, _hashdiff_type, nullable=False)
113
+
114
+ # Create hub table
115
+ hub_table = Table(entity_name, MetaData(), schema="hub")
116
+ for bk_col in get_bk_columns():
117
+ hub_table.append_column(bk_col)
118
+ hub_table.append_column(get_hk_pk_column())
119
+ hub_table.append_column(get_loaddate_column())
120
+ hub_table.append_column(get_recordsource_column())
121
+ hub_table.append_constraint(UniqueConstraint(*[bk[0] for bk in bk_keys]))
122
+
123
+ # Create sat table
124
+ sat_table = Table(entity_name, MetaData(), schema="sat")
125
+ for bk_col in get_bk_columns():
126
+ sat_table.append_column(bk_col)
127
+ sat_table.append_column(get_hk_pk_column())
128
+ sat_table.append_column(get_loaddate_pk_column())
129
+ sat_table.append_column(get_recordsource_column())
130
+ sat_table.append_column(get_hashdiff_column())
131
+ for business_col in get_business_columns():
132
+ sat_table.append_column(business_col)
133
+ sat_table.append_column(get_is_available_column())
134
+
135
+ # Create dim table
136
+ dim_table = Table(entity_name, MetaData(), schema="dim")
137
+ for bk_col in get_bk_pk_columns():
138
+ dim_table.append_column(bk_col)
139
+ for business_col in get_business_columns():
140
+ dim_table.append_column(business_col)
141
+ dim_table.append_column(get_is_available_column())
142
+ dim_table.append_column(Column("IsCurrent", self.dialect_handler.get_boolean_type(), nullable=False))
143
+ dim_table.append_column(get_datefrom_pk_column())
144
+ dim_table.append_column(get_dateto_column())
145
+
146
+ return {
147
+ "hub": hub_table,
148
+ "sat": sat_table,
149
+ "dim": dim_table,
150
+ }
151
+
152
+ def make_procedures(self, tables: dict[str, Table]
153
+ , entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
154
+ procedures = {}
155
+
156
+ header = modeling_metadata.HEADER_TEMPLATE.format(
157
+ created_on=entity_registration_date,
158
+ updated_on=datetime.now(),
159
+ version=SANDWICH_VERSION,
160
+ entity_name=self._validation_result.entity_name
161
+ )
162
+
163
+ stg_proc_name = None
164
+ if self._validation_result.stg_schema == "proxy":
165
+ stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
166
+ entity_name=self._validation_result.entity_name,
167
+ header=header
168
+ )
169
+ procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
170
+
171
+ hub_table = tables["hub"]
172
+ hub_proc_code, hub_proc_name, hub_call_stmt = self.dialect_handler.make_hub_proc(
173
+ hub_table=hub_table,
174
+ bk_keys=self._validation_result.bk_keys,
175
+ header=header
176
+ )
177
+ procedures["hub"] = (hub_proc_code, hub_proc_name, hub_call_stmt)
178
+
179
+ # Generate sat procedure
180
+ sat_table = tables["sat"]
181
+ sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd2_sat_proc(
182
+ sat_table=sat_table,
183
+ hk_name=self._validation_result.hk_keys[0][0],
184
+ hashdiff_col=modeling_metadata.hashdiff,
185
+ is_available_col=modeling_metadata.is_available,
186
+ loaddate_col=modeling_metadata.loaddate,
187
+ stg_schema=self._validation_result.stg_schema,
188
+ header=header
189
+ )
190
+ procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
191
+
192
+ # Generate dim procedure
193
+ dim_table = tables["dim"]
194
+ dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_scd2_dim_proc(
195
+ dim_table=dim_table,
196
+ bk_keys=self._validation_result.bk_keys,
197
+ header=header
198
+ )
199
+ procedures["dim"] = (dim_proc_code, dim_proc_name, dim_call_stmt)
200
+
201
+ # Generate job procedure
202
+ job_proc_names = [] # order-sensitive
203
+ if self._validation_result.stg_schema == "proxy":
204
+ job_proc_names.append(stg_proc_name)
205
+ job_proc_names.extend([hub_proc_name, sat_proc_name, dim_proc_name])
206
+ job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
207
+ entity_name=self._validation_result.entity_name,
208
+ proc_names=job_proc_names,
209
+ header=header
210
+ )
211
+ procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
212
+
213
+ # Generate drop procedure
214
+ drop_table_schemas = ["hub", "sat", "dim"]
215
+ if self._validation_result.stg_schema == "proxy":
216
+ drop_table_schemas.append("stg")
217
+ drop_proc_names = [job_proc_name, hub_proc_name, sat_proc_name, dim_proc_name]
218
+ if self._validation_result.stg_schema == "proxy":
219
+ drop_proc_names.append(stg_proc_name)
220
+ drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
221
+ entity_name=self._validation_result.entity_name,
222
+ table_schemas=drop_table_schemas,
223
+ procedures=drop_proc_names,
224
+ header=header
225
+ )
226
+ procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
227
+
228
+ return procedures