sandwich 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandwich/dialects/base.py +18 -48
- sandwich/dialects/ddl_mssql.py +2 -27
- sandwich/dialects/ddl_postgres.py +0 -18
- sandwich/dialects/mssql.py +65 -55
- sandwich/dialects/postgres.py +3 -4
- sandwich/dialects/utils.py +1 -3
- sandwich/dwh/__init__.py +82 -0
- sandwich/modeling/__init__.py +26 -9
- sandwich/modeling/strategies/base.py +94 -0
- sandwich/{strategies → modeling/strategies}/factory.py +3 -2
- sandwich/modeling/strategies/link2fact.py +225 -0
- sandwich/{strategies → modeling/strategies}/scd2dim.py +27 -45
- {sandwich-0.2.2.dist-info → sandwich-0.3.0.dist-info}/METADATA +17 -15
- sandwich-0.3.0.dist-info/RECORD +23 -0
- sandwich/dv2_helper.py +0 -98
- sandwich/strategies/base.py +0 -44
- sandwich/strategies/link2fact.py +0 -91
- sandwich-0.2.2.dist-info/RECORD +0 -23
- /sandwich/{strategies → modeling/strategies}/__init__.py +0 -0
- {sandwich-0.2.2.dist-info → sandwich-0.3.0.dist-info}/WHEEL +0 -0
- {sandwich-0.2.2.dist-info → sandwich-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Link to Fact strategy implementations."""
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Tuple
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import Column, MetaData, Table, UniqueConstraint
|
|
6
|
+
|
|
7
|
+
from sandwich import SANDWICH_VERSION
|
|
8
|
+
from sandwich.dialects.base import DialectHandler
|
|
9
|
+
from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
|
|
10
|
+
|
|
11
|
+
from .base import BaseValidator, SchemaGenerator, ValidationResult
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Link2FactValidator(BaseValidator):
|
|
15
|
+
def __init__(self, template: str):
|
|
16
|
+
super().__init__(template)
|
|
17
|
+
self._on_validate_staging = self._validate_staging
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def _validate_staging(stg_info: StgInfo, sys_info: Dv2SystemInfo) -> None:
|
|
21
|
+
# -----------------
|
|
22
|
+
# hk
|
|
23
|
+
# -----------------
|
|
24
|
+
hk_count = len(stg_info.hk_keys)
|
|
25
|
+
stg_full_name = f"stg.{stg_info.stg_name}"
|
|
26
|
+
|
|
27
|
+
# so only exactly 2 or 3 columns allowed right now
|
|
28
|
+
if hk_count < 2: # own and at least one foreign
|
|
29
|
+
raise Exception(f"At least 2 hk columns expected in `{stg_full_name}` for the `link2fact` template")
|
|
30
|
+
if hk_count > 3:
|
|
31
|
+
raise Exception(f"{hk_count} hk columns in `{stg_full_name}` for the `link2fact` template?! Are you sure?")
|
|
32
|
+
if stg_info.degenerate_field is None:
|
|
33
|
+
raise Exception(f"Degenerate field is required for `{stg_full_name}` for the `link2fact` template")
|
|
34
|
+
|
|
35
|
+
# hk_key = (key_name, key_type)
|
|
36
|
+
hk_keys_copy = list(stg_info.hk_keys.keys()).copy()
|
|
37
|
+
is_own_hk_found = False
|
|
38
|
+
expected_own_hk_column_name = f"hk_{stg_info.stg_name}"
|
|
39
|
+
|
|
40
|
+
for hk_name in stg_info.hk_keys.keys():
|
|
41
|
+
if hk_name == expected_own_hk_column_name:
|
|
42
|
+
hk_keys_copy.remove(hk_name)
|
|
43
|
+
is_own_hk_found = True
|
|
44
|
+
else:
|
|
45
|
+
# check that `name` from `hk_[name]` is existing entity
|
|
46
|
+
for en in sys_info.entities_list:
|
|
47
|
+
if hk_name == f"hk_{en.entity_name}":
|
|
48
|
+
hk_keys_copy.remove(hk_name)
|
|
49
|
+
|
|
50
|
+
if not is_own_hk_found:
|
|
51
|
+
raise Exception(f"Column `{expected_own_hk_column_name}` has not been found in `{stg_full_name}`")
|
|
52
|
+
|
|
53
|
+
if len(hk_keys_copy) > 0:
|
|
54
|
+
raise Exception(f"There are no entities related to `{", ".join(hk_keys_copy)}` columns in the `{stg_full_name}`")
|
|
55
|
+
|
|
56
|
+
# -----------------
|
|
57
|
+
# BKs
|
|
58
|
+
# -----------------
|
|
59
|
+
# it should be a warning, not an error
|
|
60
|
+
if len(stg_info.bk_keys) > 0:
|
|
61
|
+
raise Exception("You dont need bk columns for the `link2fact` template")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class Link2FactSchemaGenerator(SchemaGenerator):
|
|
65
|
+
|
|
66
|
+
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
67
|
+
self.dialect_handler = dialect_handler
|
|
68
|
+
self._validation_result = validation_result
|
|
69
|
+
|
|
70
|
+
def make_tables(self) -> dict[str, Table]:
|
|
71
|
+
return {
|
|
72
|
+
"link": self.make_link_table(),
|
|
73
|
+
"sat": self.make_sat_table(),
|
|
74
|
+
#"fact": self.make_fact_table(),
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
def make_link_table(self) -> Table:
|
|
78
|
+
entity_name = self._validation_result.entity_name
|
|
79
|
+
|
|
80
|
+
# Create link table
|
|
81
|
+
link_table = Table(entity_name, MetaData(), schema="link")
|
|
82
|
+
uks: list[str] = []
|
|
83
|
+
|
|
84
|
+
# HKs (own and FKs)
|
|
85
|
+
for hk_key in self._validation_result.hk_keys:
|
|
86
|
+
if hk_key[0] == f"hk_{entity_name}":
|
|
87
|
+
col = Column(hk_key[0], hk_key[1], primary_key=True)
|
|
88
|
+
else:
|
|
89
|
+
uks.append(hk_key[0])
|
|
90
|
+
col = Column(hk_key[0], hk_key[1], nullable=False)
|
|
91
|
+
link_table.append_column(col)
|
|
92
|
+
dg_key = self._validation_result.degenerate_field
|
|
93
|
+
link_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
|
|
94
|
+
uks.append(dg_key[0])
|
|
95
|
+
link_table.append_constraint(UniqueConstraint(*uks))
|
|
96
|
+
|
|
97
|
+
# LoadDate
|
|
98
|
+
load_date = modeling_metadata.loaddate
|
|
99
|
+
load_date_type = self._validation_result.system_column_types[load_date]
|
|
100
|
+
load_date_col = Column(load_date, load_date_type, nullable=False)
|
|
101
|
+
link_table.append_column(load_date_col)
|
|
102
|
+
|
|
103
|
+
# RecordSource
|
|
104
|
+
record_source = modeling_metadata.recordsource
|
|
105
|
+
record_source_type = self._validation_result.system_column_types[record_source]
|
|
106
|
+
record_source_col = Column(record_source, record_source_type, nullable=False)
|
|
107
|
+
link_table.append_column(record_source_col)
|
|
108
|
+
|
|
109
|
+
return link_table
|
|
110
|
+
|
|
111
|
+
def make_sat_table(self) -> Table:
|
|
112
|
+
entity_name = self._validation_result.entity_name
|
|
113
|
+
|
|
114
|
+
# Create sat table
|
|
115
|
+
sat_table = Table(entity_name, MetaData(), schema="sat")
|
|
116
|
+
|
|
117
|
+
# own HK
|
|
118
|
+
for hk_key in self._validation_result.hk_keys:
|
|
119
|
+
if hk_key[0] == f"hk_{entity_name}":
|
|
120
|
+
col = Column(hk_key[0], hk_key[1], primary_key=True)
|
|
121
|
+
sat_table.append_column(col)
|
|
122
|
+
# for transactional links
|
|
123
|
+
dg_key = self._validation_result.degenerate_field
|
|
124
|
+
if dg_key is not None:
|
|
125
|
+
sat_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# LoadDate
|
|
129
|
+
load_date = modeling_metadata.loaddate
|
|
130
|
+
load_date_type = self._validation_result.system_column_types[load_date]
|
|
131
|
+
load_date_col = Column(load_date, load_date_type, nullable=False)
|
|
132
|
+
sat_table.append_column(load_date_col)
|
|
133
|
+
|
|
134
|
+
# RecordSource
|
|
135
|
+
record_source = modeling_metadata.recordsource
|
|
136
|
+
record_source_type = self._validation_result.system_column_types[record_source]
|
|
137
|
+
record_source_col = Column(record_source, record_source_type, nullable=False)
|
|
138
|
+
sat_table.append_column(record_source_col)
|
|
139
|
+
|
|
140
|
+
for (name_, type_) in self._validation_result.business_column_types.items():
|
|
141
|
+
col = Column(name_, type_, nullable=True)
|
|
142
|
+
sat_table.append_column(col)
|
|
143
|
+
|
|
144
|
+
return sat_table
|
|
145
|
+
|
|
146
|
+
def make_fact_table(self) -> Table:
|
|
147
|
+
entity_name = self._validation_result.entity_name
|
|
148
|
+
|
|
149
|
+
# Create a fact table
|
|
150
|
+
fact_table = Table(entity_name, MetaData(), schema="fact")
|
|
151
|
+
|
|
152
|
+
# not own HKs only
|
|
153
|
+
for hk_key in self._validation_result.hk_keys:
|
|
154
|
+
if hk_key[0] != f"hk_{entity_name}":
|
|
155
|
+
col = Column(hk_key[0], hk_key[1], primary_key=True)
|
|
156
|
+
fact_table.append_column(col)
|
|
157
|
+
|
|
158
|
+
for (name_, type_) in self._validation_result.business_column_types.items():
|
|
159
|
+
col = Column(name_, type_, nullable=True)
|
|
160
|
+
fact_table.append_column(col)
|
|
161
|
+
|
|
162
|
+
return fact_table
|
|
163
|
+
|
|
164
|
+
def make_procedures(self, tables: dict[str, Table]
|
|
165
|
+
, entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
|
|
166
|
+
procedures = {}
|
|
167
|
+
|
|
168
|
+
header = modeling_metadata.HEADER_TEMPLATE.format(
|
|
169
|
+
created_on=entity_registration_date,
|
|
170
|
+
updated_on=datetime.now(),
|
|
171
|
+
version=SANDWICH_VERSION,
|
|
172
|
+
entity_name=self._validation_result.entity_name
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
stg_proc_name = None
|
|
176
|
+
if self._validation_result.stg_schema == "proxy":
|
|
177
|
+
stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
|
|
178
|
+
entity_name=self._validation_result.entity_name,
|
|
179
|
+
header=header
|
|
180
|
+
)
|
|
181
|
+
procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
|
|
182
|
+
|
|
183
|
+
link_table = tables["link"]
|
|
184
|
+
link_proc_code, link_proc_name, link_call_stmt = self.dialect_handler.make_link_proc(
|
|
185
|
+
link_table=link_table,
|
|
186
|
+
hk_keys=self._validation_result.hk_keys + [self._validation_result.degenerate_field],
|
|
187
|
+
header=header
|
|
188
|
+
)
|
|
189
|
+
procedures["link"] = (link_proc_code, link_proc_name, link_call_stmt)
|
|
190
|
+
|
|
191
|
+
sat_table = tables["sat"]
|
|
192
|
+
sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd0_sat_proc(
|
|
193
|
+
sat_table=sat_table,
|
|
194
|
+
header=header
|
|
195
|
+
)
|
|
196
|
+
procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
|
|
197
|
+
|
|
198
|
+
# job procedure
|
|
199
|
+
job_proc_names = [] # order-sensitive
|
|
200
|
+
if self._validation_result.stg_schema == "proxy":
|
|
201
|
+
job_proc_names.append(stg_proc_name)
|
|
202
|
+
job_proc_names.extend([link_proc_name, sat_proc_name])
|
|
203
|
+
job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
|
|
204
|
+
entity_name=self._validation_result.entity_name,
|
|
205
|
+
proc_names=job_proc_names,
|
|
206
|
+
header=header
|
|
207
|
+
)
|
|
208
|
+
procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
|
|
209
|
+
|
|
210
|
+
# drop procedure
|
|
211
|
+
drop_table_schemas = ["link", "sat"]
|
|
212
|
+
if self._validation_result.stg_schema == "proxy":
|
|
213
|
+
drop_table_schemas.append("stg")
|
|
214
|
+
drop_proc_names = [link_proc_name, sat_proc_name]
|
|
215
|
+
if self._validation_result.stg_schema == "proxy":
|
|
216
|
+
drop_proc_names.append(stg_proc_name)
|
|
217
|
+
drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
|
|
218
|
+
entity_name=self._validation_result.entity_name,
|
|
219
|
+
table_schemas=drop_table_schemas,
|
|
220
|
+
procedures=drop_proc_names,
|
|
221
|
+
header=header
|
|
222
|
+
)
|
|
223
|
+
procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
|
|
224
|
+
|
|
225
|
+
return procedures
|
|
@@ -3,22 +3,20 @@ from typing import Iterator, Tuple
|
|
|
3
3
|
|
|
4
4
|
from sqlalchemy import Column, MetaData, Table, UniqueConstraint
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
from
|
|
6
|
+
from sandwich import SANDWICH_VERSION
|
|
7
|
+
from sandwich.dialects.base import DialectHandler
|
|
8
|
+
from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
|
|
9
9
|
|
|
10
|
-
from .base import
|
|
10
|
+
from .base import BaseValidator, SchemaGenerator, ValidationResult
|
|
11
11
|
|
|
12
12
|
|
|
13
|
-
class Scd2DimValidator(
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
Raises: Exception"""
|
|
19
|
-
if verbose:
|
|
20
|
-
raise Exception("verbose is not implemented yet")
|
|
13
|
+
class Scd2DimValidator(BaseValidator):
|
|
14
|
+
def __init__(self, template: str):
|
|
15
|
+
super().__init__(template)
|
|
16
|
+
self._on_validate_staging = self._validate_staging
|
|
21
17
|
|
|
18
|
+
@staticmethod
|
|
19
|
+
def _validate_staging(stg_info: StgInfo, _: Dv2SystemInfo) -> None:
|
|
22
20
|
# -----------------
|
|
23
21
|
# hk
|
|
24
22
|
# -----------------
|
|
@@ -47,40 +45,21 @@ class Scd2DimValidator(Validator):
|
|
|
47
45
|
|
|
48
46
|
system_column_names = stg_info.sys_columns.keys()
|
|
49
47
|
|
|
50
|
-
# universal check - all dv2 raw objects should be auditable
|
|
51
|
-
for required_col in modeling_metadata.required_columns:
|
|
52
|
-
if required_col not in system_column_names:
|
|
53
|
-
raise Exception(f"{required_col} column is required")
|
|
54
|
-
|
|
55
|
-
# scd2dim specific validations
|
|
56
48
|
if modeling_metadata.hashdiff not in system_column_names:
|
|
57
49
|
raise Exception(f"{modeling_metadata.hashdiff} column is required for scd2dim validation")
|
|
58
50
|
if modeling_metadata.is_available not in system_column_names:
|
|
59
51
|
raise Exception(f"{modeling_metadata.is_available} column is required for scd2dim validation")
|
|
60
52
|
|
|
61
|
-
return ValidationResult(
|
|
62
|
-
stg_schema=stg_info.stg_schema,
|
|
63
|
-
entity_name=stg_info.stg_name,
|
|
64
|
-
bk_keys=[(nm, tp) for nm, tp in stg_info.bk_keys.items()],
|
|
65
|
-
hk_key=hk_key,
|
|
66
|
-
business_column_types=stg_info.bus_columns,
|
|
67
|
-
system_column_types=stg_info.sys_columns,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
53
|
class Scd2DimSchemaGenerator(SchemaGenerator):
|
|
71
54
|
|
|
72
55
|
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
73
56
|
self.dialect_handler = dialect_handler
|
|
74
57
|
self._validation_result = validation_result
|
|
75
58
|
|
|
76
|
-
@property
|
|
77
|
-
def entity_info(self) -> ValidationResult:
|
|
78
|
-
return self._validation_result
|
|
79
|
-
|
|
80
59
|
def make_tables(self) -> dict[str, Table]:
|
|
81
60
|
entity_name = self._validation_result.entity_name
|
|
82
61
|
bk_keys = self._validation_result.bk_keys
|
|
83
|
-
hk_key = self._validation_result.
|
|
62
|
+
hk_key = self._validation_result.hk_keys[0]
|
|
84
63
|
business_column_types = self._validation_result.business_column_types
|
|
85
64
|
system_column_types = self._validation_result.system_column_types
|
|
86
65
|
|
|
@@ -199,9 +178,9 @@ class Scd2DimSchemaGenerator(SchemaGenerator):
|
|
|
199
178
|
|
|
200
179
|
# Generate sat procedure
|
|
201
180
|
sat_table = tables["sat"]
|
|
202
|
-
sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.
|
|
181
|
+
sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd2_sat_proc(
|
|
203
182
|
sat_table=sat_table,
|
|
204
|
-
hk_name=self._validation_result.
|
|
183
|
+
hk_name=self._validation_result.hk_keys[0][0],
|
|
205
184
|
hashdiff_col=modeling_metadata.hashdiff,
|
|
206
185
|
is_available_col=modeling_metadata.is_available,
|
|
207
186
|
loaddate_col=modeling_metadata.loaddate,
|
|
@@ -212,7 +191,7 @@ class Scd2DimSchemaGenerator(SchemaGenerator):
|
|
|
212
191
|
|
|
213
192
|
# Generate dim procedure
|
|
214
193
|
dim_table = tables["dim"]
|
|
215
|
-
dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.
|
|
194
|
+
dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_scd2_dim_proc(
|
|
216
195
|
dim_table=dim_table,
|
|
217
196
|
bk_keys=self._validation_result.bk_keys,
|
|
218
197
|
header=header
|
|
@@ -220,25 +199,28 @@ class Scd2DimSchemaGenerator(SchemaGenerator):
|
|
|
220
199
|
procedures["dim"] = (dim_proc_code, dim_proc_name, dim_call_stmt)
|
|
221
200
|
|
|
222
201
|
# Generate job procedure
|
|
202
|
+
job_proc_names = [] # order-sensitive
|
|
203
|
+
if self._validation_result.stg_schema == "proxy":
|
|
204
|
+
job_proc_names.append(stg_proc_name)
|
|
205
|
+
job_proc_names.extend([hub_proc_name, sat_proc_name, dim_proc_name])
|
|
223
206
|
job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
|
|
224
207
|
entity_name=self._validation_result.entity_name,
|
|
225
|
-
|
|
226
|
-
sat_proc_name=sat_proc_name,
|
|
227
|
-
dim_proc_name=dim_proc_name,
|
|
228
|
-
stg_proc_name=stg_proc_name,
|
|
208
|
+
proc_names=job_proc_names,
|
|
229
209
|
header=header
|
|
230
210
|
)
|
|
231
211
|
procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
|
|
232
212
|
|
|
233
213
|
# Generate drop procedure
|
|
214
|
+
drop_table_schemas = ["hub", "sat", "dim"]
|
|
215
|
+
if self._validation_result.stg_schema == "proxy":
|
|
216
|
+
drop_table_schemas.append("stg")
|
|
217
|
+
drop_proc_names = [job_proc_name, hub_proc_name, sat_proc_name, dim_proc_name]
|
|
218
|
+
if self._validation_result.stg_schema == "proxy":
|
|
219
|
+
drop_proc_names.append(stg_proc_name)
|
|
234
220
|
drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
|
|
235
221
|
entity_name=self._validation_result.entity_name,
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
stg_proc_name=stg_proc_name,
|
|
239
|
-
hub_proc_name=hub_proc_name,
|
|
240
|
-
sat_proc_name=sat_proc_name,
|
|
241
|
-
dim_proc_name=dim_proc_name,
|
|
222
|
+
table_schemas=drop_table_schemas,
|
|
223
|
+
procedures=drop_proc_names,
|
|
242
224
|
header=header
|
|
243
225
|
)
|
|
244
226
|
procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sandwich
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: DataVault 2.0 code gen
|
|
5
5
|
Keywords: DWH,Data Vault 2.0
|
|
6
6
|
Author: Andrey Morozov
|
|
@@ -124,14 +124,14 @@ select cast(31 as bigint) [bk_id]
|
|
|
124
124
|
| | | | DateTo |
|
|
125
125
|
|
|
126
126
|
### link2fact profile columns mapping:
|
|
127
|
-
| stg | link
|
|
128
|
-
|
|
129
|
-
| HKs... | (uk)(fk)
|
|
130
|
-
| hk_`[entity_name]` | (pk)hk_`[entity_name]`
|
|
131
|
-
| degenerate_field
|
|
132
|
-
| LoadDate | LoadDate
|
|
133
|
-
| RecordSource | RecordSource
|
|
134
|
-
| FLDs... |
|
|
127
|
+
| stg | link | sat | fact |
|
|
128
|
+
|--------------------|--------------------------------|----------------------------|------|
|
|
129
|
+
| HKs... | (uk)(fk)hk_`other_entity_name` | | |
|
|
130
|
+
| hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
|
|
131
|
+
| <degenerate_field> | (uk)<degenerate_field> | <degenerate_field> | |
|
|
132
|
+
| LoadDate | LoadDate | LoadDate | |
|
|
133
|
+
| RecordSource | RecordSource | RecordSource | |
|
|
134
|
+
| FLDs... | | FLDs... | |
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
### Schemas:
|
|
@@ -147,11 +147,13 @@ select cast(31 as bigint) [bk_id]
|
|
|
147
147
|
* `proxy` - source data for a materialized staging area (meant for wrapping external data sources as SQL views)
|
|
148
148
|
|
|
149
149
|
### DV2-related schemas layering
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
|
153
|
-
|
|
154
|
-
|
|
|
150
|
+
data -> ELT -> report
|
|
151
|
+
|
|
152
|
+
| LoB* data | staging (E) | raw vault (L) | business vault (T) | information vault |
|
|
153
|
+
|-----------|-------------|---------------|--------------------|-------------------|
|
|
154
|
+
| | stg | hub | sal | dim |
|
|
155
|
+
| | proxy | sat | | fact |
|
|
156
|
+
| | pool | link | | |
|
|
155
157
|
_* Line of Business applications_
|
|
156
158
|
|
|
157
159
|
### Usage diagram
|
|
@@ -174,4 +176,4 @@ _* Line of Business applications_
|
|
|
174
176
|
+ +--------+ +---------------+
|
|
175
177
|
+
|
|
176
178
|
|
|
177
|
-
```
|
|
179
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
sandwich/__init__.py,sha256=DiQSmvml9OXujAYHILR4jz8UjoxbMvxFgRIlsdRza1E,80
|
|
2
|
+
sandwich/dialects/__init__.py,sha256=zQ4oigT3yqjZyl_IL_Tc-GmyoJar_Oqj_bGyiRVdSjg,415
|
|
3
|
+
sandwich/dialects/base.py,sha256=5wRh4T4e-SJbB-8eYtyQhd01HG-c0-x52tda7HFzSGs,3759
|
|
4
|
+
sandwich/dialects/ddl_mssql.py,sha256=VkwkJ373hDQNe2y9d9M3YJCdeCJmleUzk1hxvJjEhbA,3362
|
|
5
|
+
sandwich/dialects/ddl_postgres.py,sha256=Mdk2dAcSp8Nwc0p98nCnRtQ3vT09lH-cJVa6SdLLuwg,2995
|
|
6
|
+
sandwich/dialects/factory.py,sha256=-mpWGKp8NRmTFCXVlhbTsGhR0oAwOEdm-xnBolwWrQo,911
|
|
7
|
+
sandwich/dialects/mssql.py,sha256=laSPSfZR38aP3DBDHxv0YKwXC60kevVpyqjUmVu1xhk,10226
|
|
8
|
+
sandwich/dialects/postgres.py,sha256=8wVuqKqVxT0Mch6opK81qAyP-UaVDz5OvCuC5LEbe-U,4030
|
|
9
|
+
sandwich/dialects/utils.py,sha256=mdFp5jkJquhnYB3wiRjRunsGcHtmpcbjBzzgONfbM7w,5774
|
|
10
|
+
sandwich/dwh/__init__.py,sha256=e1Pev1TtNBO0rikHY8GrkfjcSdu8ow7xadlqvpiIHwE,4157
|
|
11
|
+
sandwich/errors.py,sha256=kIJmYbUf9wOnshJbFHwhqxZ3qEEdVtOy5Dcb2bSdnAk,872
|
|
12
|
+
sandwich/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
sandwich/modeling/__init__.py,sha256=owIplBqXoHBoiQ2D_zmckaFsXcXEl4kgT3HwBqPuLUU,4091
|
|
14
|
+
sandwich/modeling/strategies/__init__.py,sha256=VePgBfvPCLl4_GJIWlbN-VbWjB7iD0ztPjMYuzIsi3U,403
|
|
15
|
+
sandwich/modeling/strategies/base.py,sha256=_5mWrQEmyx5BESIxaZ6eC7GZjCoFQNJ9p0gXqDfGBVA,3583
|
|
16
|
+
sandwich/modeling/strategies/factory.py,sha256=Lc1MugJ-3g335lcXb3H4mVJgD0toDTYR-dmsK6-n1Yw,1598
|
|
17
|
+
sandwich/modeling/strategies/link2fact.py,sha256=KPaccOWqMmpiXA6U9Pb7IAhSQAl-xN3QaiX4jVcWRIY,9229
|
|
18
|
+
sandwich/modeling/strategies/scd2dim.py,sha256=qHlMj800lk6W7ipT9LsM_sND-EqYVttE5qIqEGGHmk8,10194
|
|
19
|
+
sandwich/py.typed,sha256=70pF0eMpuZgOyb0zFSE07ugId_AoU5z6CpLlVfg3pik,34
|
|
20
|
+
sandwich-0.3.0.dist-info/WHEEL,sha256=YUH1mBqsx8Dh2cQG2rlcuRYUhJddG9iClegy4IgnHik,79
|
|
21
|
+
sandwich-0.3.0.dist-info/entry_points.txt,sha256=0GSrDEOq9Qo5CwxppoVq-HNcILK65PchNmI6J0ripq8,44
|
|
22
|
+
sandwich-0.3.0.dist-info/METADATA,sha256=KccsRqwgn-UPVEDAUkmfekTHaYw3SVRlJW-0RByVnAM,9127
|
|
23
|
+
sandwich-0.3.0.dist-info/RECORD,,
|
sandwich/dv2_helper.py
DELETED
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
from datetime import datetime
|
|
2
|
-
|
|
3
|
-
from sqlalchemy import Connection, Engine, MetaData, Table, select, text
|
|
4
|
-
|
|
5
|
-
from src.sandwich.dialects import DialectHandlerFactory
|
|
6
|
-
from src.sandwich.modeling import get_stg_info, infer_template
|
|
7
|
-
from src.sandwich.strategies import StrategyFactory
|
|
8
|
-
|
|
9
|
-
from . import errors as err
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class Dv2Helper:
|
|
13
|
-
|
|
14
|
-
def __init__(self, stg: Table, dialect: str = "mssql", template: str | None = None):
|
|
15
|
-
stg_info = get_stg_info(stg)
|
|
16
|
-
if template is None:
|
|
17
|
-
template = infer_template(stg_info)
|
|
18
|
-
if template not in ("scd2dim", "link2fact"):
|
|
19
|
-
raise ValueError(
|
|
20
|
-
f"Template '{template}' is not supported. Supported templates: scd2dim, link2fact"
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
# legacy fields
|
|
24
|
-
self.stg_table = stg
|
|
25
|
-
self.entity_name = stg.name
|
|
26
|
-
self.template = template
|
|
27
|
-
|
|
28
|
-
self.dialect = dialect
|
|
29
|
-
self.dialect_handler = DialectHandlerFactory.create_handler(dialect)
|
|
30
|
-
self.validator = StrategyFactory.create_validator(template)
|
|
31
|
-
self.validation_result = self.validator.validate_staging(stg_info)
|
|
32
|
-
self.schema_generator = StrategyFactory.create_generator(template,
|
|
33
|
-
self.dialect_handler,
|
|
34
|
-
self.validation_result)
|
|
35
|
-
|
|
36
|
-
# Convenience properties (for backward compatibility if needed)
|
|
37
|
-
self.bk_keys = self.validation_result.bk_keys
|
|
38
|
-
self.hk_key = self.validation_result.hk_key
|
|
39
|
-
self.business_column_types = self.validation_result.business_column_types
|
|
40
|
-
self.system_column_types = self.validation_result.system_column_types
|
|
41
|
-
|
|
42
|
-
def call_register_entity(self, conn: Engine | Connection) -> datetime:
|
|
43
|
-
if self.dialect == "mssql":
|
|
44
|
-
call_stmt = "exec core.[register_entity] :entity_name, :template"
|
|
45
|
-
elif self.dialect == "postgres":
|
|
46
|
-
call_stmt = "call core.register_entity (:entity_name, :template)"
|
|
47
|
-
else:
|
|
48
|
-
raise err.Dv2NotYetImplementedForDialectError(self.dialect)
|
|
49
|
-
|
|
50
|
-
conn.execute(
|
|
51
|
-
text(call_stmt),
|
|
52
|
-
{
|
|
53
|
-
"entity_name": self.entity_name,
|
|
54
|
-
"template": self.template,
|
|
55
|
-
})
|
|
56
|
-
|
|
57
|
-
entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
|
|
58
|
-
stmt = select(entities.c.created).where(self.entity_name == entities.c.entity_name)
|
|
59
|
-
return conn.execute(stmt).scalar_one()
|
|
60
|
-
|
|
61
|
-
# def call_job_proc(self, conn: Engine | Connection, parent_execution_id: int = -1) -> None:
|
|
62
|
-
# job_proc_name = self.schema_generator.get_job_proc_name(self.entity_name, self.dialect)
|
|
63
|
-
#
|
|
64
|
-
# if self.dialect == "mssql":
|
|
65
|
-
# call_stmt = f"exec {job_proc_name} :parent_executionID"
|
|
66
|
-
# else:
|
|
67
|
-
# raise err.Dv2NotYetImplementedForDialectError(self.dialect)
|
|
68
|
-
#
|
|
69
|
-
# conn.execute(text(call_stmt), {"parent_executionID": parent_execution_id})
|
|
70
|
-
|
|
71
|
-
def generate_schema(self, conn: Engine | Connection, verbose: bool = False) -> None:
|
|
72
|
-
registered_on = self.call_register_entity(conn)
|
|
73
|
-
if verbose:
|
|
74
|
-
print(f"[ok] Registered `{self.entity_name}` for `{self.template}`")
|
|
75
|
-
|
|
76
|
-
tables = self.schema_generator.make_tables()
|
|
77
|
-
for table_type, table in tables.items():
|
|
78
|
-
if table is not None:
|
|
79
|
-
table.create(conn, checkfirst=True)
|
|
80
|
-
if verbose:
|
|
81
|
-
print(f"[ok] Created table [{table.schema}].[{table.name}]")
|
|
82
|
-
|
|
83
|
-
procedures = self.schema_generator.make_procedures(tables, registered_on)
|
|
84
|
-
for proc_type, (proc_code, proc_name, _) in procedures.items():
|
|
85
|
-
conn.execute(text(proc_code))
|
|
86
|
-
if verbose:
|
|
87
|
-
print(f"[ok] Created or altered {proc_name}")
|
|
88
|
-
|
|
89
|
-
@classmethod
|
|
90
|
-
def update_registered_entities(cls, conn: Engine | Connection, dialect: str = "mssql",
|
|
91
|
-
verbose: bool = False):
|
|
92
|
-
metadata = MetaData()
|
|
93
|
-
entities = Table("entities", metadata, schema="core", autoload_with=conn)
|
|
94
|
-
select_result = conn.execute(entities.select().where(~entities.c.is_deleted))
|
|
95
|
-
for row in select_result.mappings().all():
|
|
96
|
-
stg = Table(row["entity_name"], metadata, schema="stg", autoload_with=conn)
|
|
97
|
-
dv2 = cls(stg, dialect=dialect, template=row["template"])
|
|
98
|
-
dv2.generate_schema(conn, verbose=verbose)
|
sandwich/strategies/base.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
from abc import ABC, abstractmethod
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from typing import Any, Tuple
|
|
4
|
-
|
|
5
|
-
from sqlalchemy import Table
|
|
6
|
-
|
|
7
|
-
from src.sandwich.modeling import StgInfo
|
|
8
|
-
|
|
9
|
-
class ValidationResult:
|
|
10
|
-
def __init__(self, stg_schema: str, entity_name: str
|
|
11
|
-
, bk_keys: list[Tuple[str, Any]]
|
|
12
|
-
, hk_key: Tuple[str, Any]
|
|
13
|
-
, business_column_types: dict[str, Any]
|
|
14
|
-
, system_column_types: dict[str, Any]):
|
|
15
|
-
self.stg_schema = stg_schema
|
|
16
|
-
self.entity_name = entity_name
|
|
17
|
-
self.bk_keys = bk_keys
|
|
18
|
-
self.hk_key = hk_key
|
|
19
|
-
self.business_column_types = business_column_types
|
|
20
|
-
self.system_column_types = system_column_types
|
|
21
|
-
|
|
22
|
-
class Validator(ABC):
|
|
23
|
-
@abstractmethod
|
|
24
|
-
def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class SchemaGenerator(ABC):
|
|
29
|
-
@property
|
|
30
|
-
@abstractmethod
|
|
31
|
-
def entity_info(self) -> ValidationResult: ...
|
|
32
|
-
|
|
33
|
-
@abstractmethod
|
|
34
|
-
def make_tables(self) -> dict[str, Table]:
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
@abstractmethod
|
|
38
|
-
def make_procedures(
|
|
39
|
-
self,
|
|
40
|
-
tables: dict[str, Table],
|
|
41
|
-
entity_registration_date: datetime = datetime.now()
|
|
42
|
-
) -> dict[str, Tuple[str, str, str]]:
|
|
43
|
-
pass
|
|
44
|
-
|
sandwich/strategies/link2fact.py
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
"""Link to Fact strategy implementations."""
|
|
2
|
-
from datetime import datetime
|
|
3
|
-
from typing import Tuple
|
|
4
|
-
|
|
5
|
-
from sqlalchemy import Table
|
|
6
|
-
|
|
7
|
-
from src.sandwich.dialects.base import DialectHandler
|
|
8
|
-
|
|
9
|
-
from .base import Validator, SchemaGenerator, ValidationResult
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class Link2FactValidator(Validator):
|
|
13
|
-
|
|
14
|
-
def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> dict:
|
|
15
|
-
"""Validate staging table for link2fact mode."""
|
|
16
|
-
if verbose:
|
|
17
|
-
raise Exception("verbose is not implemented yet")
|
|
18
|
-
|
|
19
|
-
# TODO: Implement link2fact specific validation logic
|
|
20
|
-
# This will likely be different from scd2dim validation
|
|
21
|
-
# For example: checking for link keys, fact columns, etc.
|
|
22
|
-
|
|
23
|
-
bk_keys = []
|
|
24
|
-
hk_key = None
|
|
25
|
-
business_column_types = {}
|
|
26
|
-
system_column_types = {}
|
|
27
|
-
link_keys = [] # New concept for link2fact
|
|
28
|
-
fact_columns = [] # New concept for link2fact
|
|
29
|
-
|
|
30
|
-
# Placeholder validation logic
|
|
31
|
-
for col in stg_table.columns.values():
|
|
32
|
-
# TODO: Implement column classification for link2fact mode
|
|
33
|
-
pass
|
|
34
|
-
|
|
35
|
-
return {
|
|
36
|
-
"stg_schema": stg_table.schema,
|
|
37
|
-
"entity_name": stg_table.name,
|
|
38
|
-
"bk_keys": bk_keys,
|
|
39
|
-
"hk_key": hk_key,
|
|
40
|
-
"business_column_types": business_column_types,
|
|
41
|
-
"system_column_types": system_column_types,
|
|
42
|
-
"link_keys": link_keys,
|
|
43
|
-
"fact_columns": fact_columns,
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class Link2FactSchemaGenerator(SchemaGenerator):
|
|
48
|
-
|
|
49
|
-
def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
|
|
50
|
-
self.dialect_handler = dialect_handler
|
|
51
|
-
self._validation_result = validation_result
|
|
52
|
-
|
|
53
|
-
@property
|
|
54
|
-
def entity_info(self) -> ValidationResult:
|
|
55
|
-
return self._validation_result
|
|
56
|
-
|
|
57
|
-
def make_tables(self) -> dict[str, Table]:
|
|
58
|
-
"""Create link and fact tables for link2fact mode."""
|
|
59
|
-
# TODO: Implement link2fact table creation
|
|
60
|
-
# This will create different table structures than scd2dim
|
|
61
|
-
# For example: link table, fact table (instead of hub/sat/dim)
|
|
62
|
-
|
|
63
|
-
entity_name = self._validation_result.entity_name
|
|
64
|
-
|
|
65
|
-
# Placeholder - actual implementation needed
|
|
66
|
-
link_table: Table | None = None
|
|
67
|
-
fact_table: Table | None = None
|
|
68
|
-
|
|
69
|
-
return {
|
|
70
|
-
"link": link_table,
|
|
71
|
-
"fact": fact_table,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
def make_procedures(
|
|
75
|
-
self,
|
|
76
|
-
tables: dict[str, Table],
|
|
77
|
-
entity_registration_date: datetime,
|
|
78
|
-
) -> dict[str, Tuple[str, str]]:
|
|
79
|
-
"""Generate procedures for link2fact mode."""
|
|
80
|
-
procedures = {}
|
|
81
|
-
|
|
82
|
-
# TODO: Implement link2fact procedure generation using dialect_handler
|
|
83
|
-
# This will generate different procedures than scd2dim
|
|
84
|
-
# For example: link population, fact population, aggregation logic, etc.
|
|
85
|
-
|
|
86
|
-
# When implementing, use self.dialect_handler methods to generate SQL
|
|
87
|
-
# Example:
|
|
88
|
-
# link_proc_code, link_proc_name = self.dialect_handler.make_link_proc(...)
|
|
89
|
-
# procedures["link"] = (link_proc_code, link_proc_name)
|
|
90
|
-
|
|
91
|
-
return procedures
|