sandwich 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandwich/__init__.py +3 -0
- sandwich/dialects/__init__.py +12 -0
- sandwich/dialects/base.py +166 -0
- sandwich/dialects/ddl_mssql.py +148 -0
- sandwich/dialects/ddl_postgres.py +132 -0
- sandwich/dialects/factory.py +27 -0
- sandwich/dialects/mssql.py +271 -0
- sandwich/dialects/postgres.py +108 -0
- sandwich/dialects/utils.py +149 -0
- sandwich/dv2_helper.py +98 -0
- sandwich/errors.py +25 -0
- sandwich/main.py +0 -0
- sandwich/modeling/__init__.py +103 -0
- sandwich/strategies/__init__.py +15 -0
- sandwich/strategies/base.py +44 -0
- sandwich/strategies/factory.py +38 -0
- sandwich/strategies/link2fact.py +91 -0
- sandwich/strategies/scd2dim.py +246 -0
- {sandwich-0.2.0.dist-info → sandwich-0.2.2.dist-info}/METADATA +169 -136
- sandwich-0.2.2.dist-info/RECORD +23 -0
- sandwich-0.2.2.dist-info/WHEEL +4 -0
- sandwich-0.2.2.dist-info/entry_points.txt +3 -0
- sandwich-0.2.0.dist-info/RECORD +0 -5
- sandwich-0.2.0.dist-info/WHEEL +0 -4
- sandwich-0.2.0.dist-info/licenses/LICENSE +0 -9
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""MSSQL dialect handler for SQL code generation."""
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import dialects, Table
|
|
5
|
+
|
|
6
|
+
from .base import DialectHandler
|
|
7
|
+
from .utils import get_columns_list
|
|
8
|
+
|
|
9
|
+
class MssqlDialectHandler(DialectHandler):
|
|
10
|
+
"""Dialect handler for Microsoft SQL Server."""
|
|
11
|
+
|
|
12
|
+
def get_boolean_type(self):
|
|
13
|
+
return dialects.mssql.BIT
|
|
14
|
+
|
|
15
|
+
def get_proc_name_format(self, schema: str, operation: str, entity_name: str) -> str:
|
|
16
|
+
"""Get MSSQL procedure naming format."""
|
|
17
|
+
return f"[{schema}].[{operation}_{entity_name}]"
|
|
18
|
+
|
|
19
|
+
def apply_proc_template(self, proc_name: str, sql_body: str, header: str) -> str:
|
|
20
|
+
"""Wrap SQL body in MSSQL procedure template with error handling."""
|
|
21
|
+
# language=sql
|
|
22
|
+
proc_template_sql = f"""
|
|
23
|
+
{header}
|
|
24
|
+
create or alter proc {proc_name} (@parent_executionID bigint = null) as
|
|
25
|
+
begin
|
|
26
|
+
set nocount on;
|
|
27
|
+
|
|
28
|
+
declare @executionID bigint;
|
|
29
|
+
exec core.LogExecution @@PROCID, null, @executionID out, @parent_executionID;
|
|
30
|
+
|
|
31
|
+
begin try
|
|
32
|
+
{sql_body}
|
|
33
|
+
exec core.LogExecution @@PROCID, @executionID, @executionID out;
|
|
34
|
+
end try
|
|
35
|
+
begin catch
|
|
36
|
+
declare @err table (ErrorID int);
|
|
37
|
+
declare @ErrorMessage NVARCHAR(4000);
|
|
38
|
+
declare @ErrorSeverity INT;
|
|
39
|
+
declare @ErrorState INT;
|
|
40
|
+
|
|
41
|
+
set @ErrorMessage = ERROR_MESSAGE();
|
|
42
|
+
set @ErrorSeverity = ERROR_SEVERITY();
|
|
43
|
+
set @ErrorState = ERROR_STATE();
|
|
44
|
+
|
|
45
|
+
insert into core.ErrorLog
|
|
46
|
+
output inserted.ErrorID into @err
|
|
47
|
+
values (
|
|
48
|
+
SUSER_SNAME(),
|
|
49
|
+
ERROR_NUMBER(),
|
|
50
|
+
@ErrorState,
|
|
51
|
+
@ErrorSeverity,
|
|
52
|
+
ERROR_LINE(),
|
|
53
|
+
ERROR_PROCEDURE(),
|
|
54
|
+
@ErrorMessage,
|
|
55
|
+
getdate()
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
update [core].[ExecutionLog]
|
|
59
|
+
set [errorID] = (select ErrorID from @err)
|
|
60
|
+
, [end_timestamp] = getdate()
|
|
61
|
+
where [executionID] = @executionID;
|
|
62
|
+
|
|
63
|
+
RAISERROR (
|
|
64
|
+
@ErrorMessage,
|
|
65
|
+
@ErrorSeverity,
|
|
66
|
+
@ErrorState
|
|
67
|
+
);
|
|
68
|
+
end catch
|
|
69
|
+
end
|
|
70
|
+
"""
|
|
71
|
+
return proc_template_sql
|
|
72
|
+
|
|
73
|
+
def make_stg_materialization_proc(self, entity_name: str, header: str) -> Tuple[str, str, str]:
|
|
74
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_stg", entity_name)
|
|
75
|
+
|
|
76
|
+
# language=sql
|
|
77
|
+
proc_body = f"""
|
|
78
|
+
if object_id('stg.{entity_name}') is not null drop table stg.{entity_name};
|
|
79
|
+
select *
|
|
80
|
+
into stg.{entity_name}
|
|
81
|
+
from proxy.{entity_name};
|
|
82
|
+
"""
|
|
83
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
84
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
85
|
+
|
|
86
|
+
def make_hub_proc(self, hub_table: Table, bk_keys: list, header: str) -> Tuple[str, str, str]:
|
|
87
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_{hub_table.schema}", hub_table.name)
|
|
88
|
+
where_fields_list_str = " and ".join([f"hub.[{bk[0]}] = stg.[{bk[0]}]" for bk in bk_keys])
|
|
89
|
+
columns_list = get_columns_list(hub_table)
|
|
90
|
+
|
|
91
|
+
# language=sql
|
|
92
|
+
proc_body = f"""
|
|
93
|
+
insert into [{hub_table.schema}].[{hub_table.name}]
|
|
94
|
+
({columns_list})
|
|
95
|
+
select distinct {get_columns_list(hub_table, alias="stg")}
|
|
96
|
+
from stg.[{hub_table.name}] as stg
|
|
97
|
+
where not exists (
|
|
98
|
+
select *
|
|
99
|
+
from [{hub_table.schema}].[{hub_table.name}] as hub
|
|
100
|
+
where {where_fields_list_str}
|
|
101
|
+
);
|
|
102
|
+
"""
|
|
103
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
104
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
105
|
+
|
|
106
|
+
def make_sat_proc(
|
|
107
|
+
self,
|
|
108
|
+
sat_table: Table,
|
|
109
|
+
hk_name: str,
|
|
110
|
+
hashdiff_col: str,
|
|
111
|
+
is_available_col: str,
|
|
112
|
+
loaddate_col: str,
|
|
113
|
+
stg_schema: str,
|
|
114
|
+
header: str
|
|
115
|
+
) -> Tuple[str, str, str]:
|
|
116
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_{sat_table.schema}", sat_table.name)
|
|
117
|
+
columns_list = get_columns_list(sat_table)
|
|
118
|
+
|
|
119
|
+
def smart_replace(column_name: str) -> str:
|
|
120
|
+
if column_name == "LoadDate":
|
|
121
|
+
result = "sysdatetime() as [LoadDate]"
|
|
122
|
+
elif column_name == "IsAvailable":
|
|
123
|
+
result = "cast(0 as bit) as [IsAvailable]"
|
|
124
|
+
else:
|
|
125
|
+
result = f"sat.[{column_name}]"
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
select_columns_list = ", ".join([smart_replace(col.name) for col in sat_table.columns.values()])
|
|
129
|
+
|
|
130
|
+
if stg_schema == "proxy":
|
|
131
|
+
stg_table_name = f"stg.[{sat_table.name}]"
|
|
132
|
+
materialization_stmt = ""
|
|
133
|
+
else:
|
|
134
|
+
stg_table_name = "#materialized"
|
|
135
|
+
materialization_stmt = f"""
|
|
136
|
+
select distinct {columns_list}
|
|
137
|
+
into #materialized
|
|
138
|
+
-- drop table #materialized
|
|
139
|
+
from stg.[{sat_table.name}];
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
# language=sql
|
|
143
|
+
proc_body = f"""{materialization_stmt}
|
|
144
|
+
with ranked_history as
|
|
145
|
+
(
|
|
146
|
+
select {columns_list}
|
|
147
|
+
, row_number() over (partition by [{hk_name}] order by [{loaddate_col}] desc) [DescRank]
|
|
148
|
+
from [{sat_table.schema}].[{sat_table.name}]
|
|
149
|
+
)
|
|
150
|
+
insert into [{sat_table.schema}].[{sat_table.name}]
|
|
151
|
+
({columns_list})
|
|
152
|
+
select {get_columns_list(sat_table, alias="stg")}
|
|
153
|
+
from {stg_table_name} stg
|
|
154
|
+
where not exists (
|
|
155
|
+
select *
|
|
156
|
+
from ranked_history sat
|
|
157
|
+
where sat.[DescRank] = 1
|
|
158
|
+
and stg.[{hk_name}] = sat.[{hk_name}]
|
|
159
|
+
and stg.[{hashdiff_col}] = sat.[{hashdiff_col}]
|
|
160
|
+
and sat.[{is_available_col}] = 1
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
union all
|
|
164
|
+
|
|
165
|
+
select {select_columns_list}
|
|
166
|
+
from ranked_history sat
|
|
167
|
+
where not exists (
|
|
168
|
+
select *
|
|
169
|
+
from {stg_table_name} stg
|
|
170
|
+
where stg.[{hk_name}] = sat.[{hk_name}]
|
|
171
|
+
)
|
|
172
|
+
and sat.[DescRank] = 1
|
|
173
|
+
and sat.[{is_available_col}] = 1;
|
|
174
|
+
"""
|
|
175
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
176
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
177
|
+
|
|
178
|
+
def make_dim_scd2_proc(
|
|
179
|
+
self,
|
|
180
|
+
dim_table: Table,
|
|
181
|
+
bk_keys: list,
|
|
182
|
+
header: str
|
|
183
|
+
) -> Tuple[str, str, str]:
|
|
184
|
+
proc_name = self.get_proc_name_format("elt", f"Recalculate_{dim_table.schema}", dim_table.name)
|
|
185
|
+
columns_list = get_columns_list(dim_table)
|
|
186
|
+
pk_keys = lambda: ", ".join([f"sat.[{bk[0]}]" for bk in bk_keys])
|
|
187
|
+
|
|
188
|
+
def smart_replace(column_name: str) -> str:
|
|
189
|
+
if column_name == "DateFrom":
|
|
190
|
+
result = "sat.LoadDate as [DateFrom]"
|
|
191
|
+
elif column_name == "DateTo":
|
|
192
|
+
result = f"lead(dateadd(microsecond, -1, sat.LoadDate), 1, '9999-12-31 23:59:59.9999999') over (partition by {pk_keys()} order by sat.LoadDate) [DateTo]"
|
|
193
|
+
elif column_name == "IsCurrent":
|
|
194
|
+
result = f"iif(lead(sat.LoadDate) over (partition by {pk_keys()} order by sat.LoadDate) is null, 1, 0) [IsCurrent]"
|
|
195
|
+
else:
|
|
196
|
+
result = f"sat.[{column_name}]"
|
|
197
|
+
return result
|
|
198
|
+
|
|
199
|
+
select_columns_list = "\n\t, ".join([smart_replace(col.name) for col in dim_table.columns.values()])
|
|
200
|
+
|
|
201
|
+
# language=sql
|
|
202
|
+
proc_body = f"""
|
|
203
|
+
truncate table [{dim_table.schema}].[{dim_table.name}];
|
|
204
|
+
|
|
205
|
+
insert into [{dim_table.schema}].[{dim_table.name}]
|
|
206
|
+
({columns_list})
|
|
207
|
+
select {select_columns_list}
|
|
208
|
+
from sat.[{dim_table.name}] sat
|
|
209
|
+
"""
|
|
210
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
211
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
212
|
+
|
|
213
|
+
def make_job_proc(
|
|
214
|
+
self,
|
|
215
|
+
entity_name: str,
|
|
216
|
+
hub_proc_name: str,
|
|
217
|
+
sat_proc_name: str,
|
|
218
|
+
dim_proc_name: str,
|
|
219
|
+
stg_proc_name: str | None,
|
|
220
|
+
header: str
|
|
221
|
+
) -> Tuple[str, str, str]:
|
|
222
|
+
"""Generate MSSQL job orchestration procedure."""
|
|
223
|
+
proc_name = f"[job].[Run_all_related_to_{entity_name}]"
|
|
224
|
+
|
|
225
|
+
stg_call = f" exec {stg_proc_name} @executionID;\n" if stg_proc_name else ""
|
|
226
|
+
|
|
227
|
+
# language=sql
|
|
228
|
+
proc_body = f"""
|
|
229
|
+
{stg_call} exec {hub_proc_name} @executionID;
|
|
230
|
+
exec {sat_proc_name} @executionID;
|
|
231
|
+
exec {dim_proc_name} @executionID;
|
|
232
|
+
"""
|
|
233
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
234
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
235
|
+
|
|
236
|
+
def make_drop_proc(
|
|
237
|
+
self,
|
|
238
|
+
entity_name: str,
|
|
239
|
+
stg_schema: str,
|
|
240
|
+
job_proc_name: str,
|
|
241
|
+
stg_proc_name: str | None,
|
|
242
|
+
hub_proc_name: str,
|
|
243
|
+
sat_proc_name: str,
|
|
244
|
+
dim_proc_name: str,
|
|
245
|
+
header: str
|
|
246
|
+
) -> Tuple[str, str, str]:
|
|
247
|
+
"""Generate MSSQL cleanup/drop procedure."""
|
|
248
|
+
proc_name = f"[meta].[Drop_all_related_to_{entity_name}]"
|
|
249
|
+
|
|
250
|
+
stg_drops = f"""
|
|
251
|
+
drop table if exists [stg].[{entity_name}];
|
|
252
|
+
drop procedure if exists {stg_proc_name};
|
|
253
|
+
""" if stg_schema == "proxy" else ""
|
|
254
|
+
|
|
255
|
+
# language=sql
|
|
256
|
+
proc_body = f"""{stg_drops}
|
|
257
|
+
drop table if exists [dim].[{entity_name}];
|
|
258
|
+
drop procedure if exists {dim_proc_name};
|
|
259
|
+
drop table if exists [sat].[{entity_name}];
|
|
260
|
+
drop procedure if exists {sat_proc_name};
|
|
261
|
+
drop table if exists [hub].[{entity_name}];
|
|
262
|
+
drop procedure if exists {hub_proc_name};
|
|
263
|
+
drop procedure if exists {job_proc_name};
|
|
264
|
+
|
|
265
|
+
update core.[entities]
|
|
266
|
+
set [deleted] = sysdatetime()
|
|
267
|
+
, [is_deleted] = 1
|
|
268
|
+
where [entity_name] = '{entity_name}'
|
|
269
|
+
"""
|
|
270
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
271
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Postgres dialect handler for SQL code generation."""
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import dialects, Table
|
|
5
|
+
|
|
6
|
+
from src.sandwich.dialects.base import DialectHandler
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PostgresDialectHandler(DialectHandler):
|
|
10
|
+
"""Dialect handler for PostgreSQL.
|
|
11
|
+
|
|
12
|
+
NOTE: This is a stub implementation. All methods need to be implemented
|
|
13
|
+
based on PostgreSQL syntax and conventions.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def get_boolean_type(self):
|
|
17
|
+
return dialects.postgresql.BOOLEAN
|
|
18
|
+
|
|
19
|
+
def get_proc_name_format(self, schema: str, operation: str, entity_name: str) -> str:
|
|
20
|
+
"""Get Postgres procedure naming format."""
|
|
21
|
+
# Postgres uses lowercase with underscores by convention
|
|
22
|
+
operation_lower = operation.lower()
|
|
23
|
+
return f"{schema}.{operation_lower}_{entity_name}"
|
|
24
|
+
|
|
25
|
+
def apply_proc_template(self, proc_name: str, sql_body: str, header: str) -> str:
|
|
26
|
+
"""Wrap SQL body in Postgres procedure template with error handling."""
|
|
27
|
+
# TODO: Implement Postgres procedure template
|
|
28
|
+
# Postgres uses CREATE OR REPLACE PROCEDURE/FUNCTION with PL/pgSQL
|
|
29
|
+
# Error handling uses EXCEPTION blocks
|
|
30
|
+
# Logging integration needed
|
|
31
|
+
raise NotImplementedError("Postgres procedure template not yet implemented")
|
|
32
|
+
|
|
33
|
+
def make_stg_materialization_proc(
|
|
34
|
+
self,
|
|
35
|
+
entity_name: str,
|
|
36
|
+
header: str
|
|
37
|
+
) -> Tuple[str, str]:
|
|
38
|
+
"""Generate Postgres staging materialization procedure."""
|
|
39
|
+
# TODO: Implement using CREATE OR REPLACE and DROP/CREATE TABLE pattern
|
|
40
|
+
raise NotImplementedError("Postgres staging materialization not yet implemented")
|
|
41
|
+
|
|
42
|
+
def make_hub_proc(
|
|
43
|
+
self,
|
|
44
|
+
hub_table: Table,
|
|
45
|
+
bk_keys: list,
|
|
46
|
+
header: str
|
|
47
|
+
) -> Tuple[str, str]:
|
|
48
|
+
"""Generate Postgres hub population procedure."""
|
|
49
|
+
# TODO: Implement using INSERT...ON CONFLICT or NOT EXISTS pattern
|
|
50
|
+
raise NotImplementedError("Postgres hub procedure not yet implemented")
|
|
51
|
+
|
|
52
|
+
def make_sat_proc(
|
|
53
|
+
self,
|
|
54
|
+
sat_table: Table,
|
|
55
|
+
hk_name: str,
|
|
56
|
+
hashdiff_col: str,
|
|
57
|
+
is_available_col: str,
|
|
58
|
+
loaddate_col: str,
|
|
59
|
+
stg_schema: str,
|
|
60
|
+
header: str
|
|
61
|
+
) -> Tuple[str, str]:
|
|
62
|
+
"""Generate Postgres satellite population procedure."""
|
|
63
|
+
# TODO: Implement using CTE and window functions (similar to MSSQL but with Postgres syntax)
|
|
64
|
+
# Use CURRENT_TIMESTAMP instead of SYSDATETIME()
|
|
65
|
+
# Use BOOLEAN type instead of BIT
|
|
66
|
+
raise NotImplementedError("Postgres satellite procedure not yet implemented")
|
|
67
|
+
|
|
68
|
+
def make_dim_scd2_proc(
|
|
69
|
+
self,
|
|
70
|
+
dim_table: Table,
|
|
71
|
+
bk_keys: list,
|
|
72
|
+
header: str
|
|
73
|
+
) -> Tuple[str, str]:
|
|
74
|
+
"""Generate Postgres dimension SCD2 recalculation procedure."""
|
|
75
|
+
# TODO: Implement using TRUNCATE and INSERT with window functions
|
|
76
|
+
# Use LAG/LEAD for SCD2 date calculations
|
|
77
|
+
# Use INTERVAL for date arithmetic instead of DATEADD
|
|
78
|
+
raise NotImplementedError("Postgres dimension procedure not yet implemented")
|
|
79
|
+
|
|
80
|
+
def make_job_proc(
|
|
81
|
+
self,
|
|
82
|
+
entity_name: str,
|
|
83
|
+
hub_proc_name: str,
|
|
84
|
+
sat_proc_name: str,
|
|
85
|
+
dim_proc_name: str,
|
|
86
|
+
stg_proc_name: str | None,
|
|
87
|
+
header: str
|
|
88
|
+
) -> Tuple[str, str]:
|
|
89
|
+
"""Generate Postgres job orchestration procedure."""
|
|
90
|
+
# TODO: Implement using CALL statements for other procedures
|
|
91
|
+
# Pass execution_id through procedure parameters
|
|
92
|
+
raise NotImplementedError("Postgres job procedure not yet implemented")
|
|
93
|
+
|
|
94
|
+
def make_drop_proc(
|
|
95
|
+
self,
|
|
96
|
+
entity_name: str,
|
|
97
|
+
stg_schema: str,
|
|
98
|
+
job_proc_name: str,
|
|
99
|
+
stg_proc_name: str | None,
|
|
100
|
+
hub_proc_name: str,
|
|
101
|
+
sat_proc_name: str,
|
|
102
|
+
dim_proc_name: str,
|
|
103
|
+
header: str
|
|
104
|
+
) -> Tuple[str, str]:
|
|
105
|
+
"""Generate Postgres cleanup/drop procedure."""
|
|
106
|
+
# TODO: Implement using DROP IF EXISTS for tables and procedures
|
|
107
|
+
# Update core.entities with deletion timestamp
|
|
108
|
+
raise NotImplementedError("Postgres drop procedure not yet implemented")
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Engine, Connection, Table, text, TextClause
|
|
5
|
+
|
|
6
|
+
from src.sandwich import SANDWICH_VERSION
|
|
7
|
+
from .. import errors as err
|
|
8
|
+
from src.sandwich.modeling import modeling_metadata
|
|
9
|
+
|
|
10
|
+
from . import ddl_mssql, ddl_postgres
|
|
11
|
+
|
|
12
|
+
def get_columns_list(table: Table, sep: str = ", ", alias: str = None):
|
|
13
|
+
alias = alias + "." if alias else ""
|
|
14
|
+
return sep.join([f"{alias or ''}[{fld.name}]" for fld in table.columns.values()])
|
|
15
|
+
|
|
16
|
+
def get_string_to_hash_ddl_mssql(columns_count: int) -> str:
|
|
17
|
+
if columns_count < 2 or columns_count > 100:
|
|
18
|
+
raise ValueError("columns_count must be between 2 and 100")
|
|
19
|
+
|
|
20
|
+
params_list_str = ",\n\t".join([f"@StrValue{v} nvarchar(1000)" for v in range(1, columns_count + 1)])
|
|
21
|
+
concat_list_str = ", ';',\n\t\t\t".join(
|
|
22
|
+
[f"rtrim(ltrim(isnull(@StrValue{v}, '')))" for v in range(1, columns_count + 1)])
|
|
23
|
+
|
|
24
|
+
# language=sql
|
|
25
|
+
func = f"""
|
|
26
|
+
create or alter function [core].[StringToHash{columns_count}]
|
|
27
|
+
(
|
|
28
|
+
{params_list_str}
|
|
29
|
+
) returns char(40) as
|
|
30
|
+
begin
|
|
31
|
+
declare @result char(40);
|
|
32
|
+
set @result = upper(convert(char(40), hashbytes('sha1',
|
|
33
|
+
upper(concat(
|
|
34
|
+
{concat_list_str}
|
|
35
|
+
))
|
|
36
|
+
), 2));
|
|
37
|
+
return @result;
|
|
38
|
+
end"""
|
|
39
|
+
return func
|
|
40
|
+
|
|
41
|
+
def get_string_to_hash_ddl_postgres(columns_count: int) -> str:
|
|
42
|
+
if columns_count < 2 or columns_count > 100:
|
|
43
|
+
raise ValueError("columns_count must be between 2 and 100")
|
|
44
|
+
|
|
45
|
+
params_list_str = ",\n\t".join([f"p_str_value{v} text" for v in range(1, columns_count + 1)])
|
|
46
|
+
concat_list_str = ", ';',\n\t\t\t".join(
|
|
47
|
+
[f"upper(trim(coalesce(p_str_value{v}, '')))" for v in range(1, columns_count + 1)])
|
|
48
|
+
|
|
49
|
+
# language=sql
|
|
50
|
+
func = f"""
|
|
51
|
+
create or replace function core.string_to_hash{columns_count}(
|
|
52
|
+
{params_list_str}
|
|
53
|
+
) returns char(40)
|
|
54
|
+
language plpgsql
|
|
55
|
+
as $$
|
|
56
|
+
declare
|
|
57
|
+
result char(40);
|
|
58
|
+
begin
|
|
59
|
+
result :=
|
|
60
|
+
upper(
|
|
61
|
+
encode(
|
|
62
|
+
digest(
|
|
63
|
+
concat(
|
|
64
|
+
{concat_list_str}
|
|
65
|
+
),
|
|
66
|
+
'sha1'
|
|
67
|
+
),
|
|
68
|
+
'hex'
|
|
69
|
+
)
|
|
70
|
+
);
|
|
71
|
+
return cast(result as char(40));
|
|
72
|
+
end;
|
|
73
|
+
$$;"""
|
|
74
|
+
return func
|
|
75
|
+
|
|
76
|
+
def initialize_database(conn: Engine | Connection, dialect: str = "mssql",
|
|
77
|
+
str_to_hash_count:int = 66,
|
|
78
|
+
verbose: bool = False,
|
|
79
|
+
drop_entities_table: bool = False) -> None:
|
|
80
|
+
init_scripts: dict[str, str] = {}
|
|
81
|
+
header = modeling_metadata.HEADER_TEMPLATE.format(
|
|
82
|
+
created_on=datetime.now(),
|
|
83
|
+
updated_on=datetime.now(),
|
|
84
|
+
version=SANDWICH_VERSION,
|
|
85
|
+
entity_name="SYSTEM")
|
|
86
|
+
|
|
87
|
+
if dialect == "mssql":
|
|
88
|
+
init_scripts["create_schemas"] = ddl_mssql.create_schemas
|
|
89
|
+
if drop_entities_table:
|
|
90
|
+
init_scripts["drop_entities_table"] = "drop table if exists [core].[entities];"
|
|
91
|
+
init_scripts["create_entities_table"] = ddl_mssql.create_entities_table
|
|
92
|
+
init_scripts["create_proc_register_entity"] = header + ddl_mssql.create_proc_register_entity
|
|
93
|
+
init_scripts["create_func_StringToHash1"] = header + ddl_mssql.create_func_StringToHash
|
|
94
|
+
for i in range(2, str_to_hash_count):
|
|
95
|
+
init_scripts[f"create_func_StringToHash{i}"] = header + get_string_to_hash_ddl_mssql(i)
|
|
96
|
+
init_scripts["create_table_ExecutionLog"] = ddl_mssql.create_table_ExecutionLog
|
|
97
|
+
init_scripts["create_table_ErrorLog"] = ddl_mssql.create_table_ErrorLog
|
|
98
|
+
init_scripts["create_proc_LogExecution"] = header + ddl_mssql.create_proc_LogExecution
|
|
99
|
+
elif dialect == "postgres":
|
|
100
|
+
init_scripts["create_extensions"] = ddl_postgres.create_extensions
|
|
101
|
+
init_scripts["create_schemas"] = ddl_postgres.create_schemas
|
|
102
|
+
if drop_entities_table:
|
|
103
|
+
init_scripts["drop_entities_table"] = "drop table if exists core.entities"
|
|
104
|
+
init_scripts["create_entities_table"] = ddl_postgres.create_entities_table
|
|
105
|
+
init_scripts["create_proc_register_entity"] = ddl_postgres.create_proc_register_entity
|
|
106
|
+
init_scripts["create_func_StringToHash1"] = ddl_postgres.create_func_StringToHash
|
|
107
|
+
for i in range(2, str_to_hash_count):
|
|
108
|
+
init_scripts[f"create_func_StringToHash{i}"] = get_string_to_hash_ddl_postgres(i)
|
|
109
|
+
init_scripts["create_table_ExecutionLog"] = ddl_postgres.create_table_ExecutionLog
|
|
110
|
+
init_scripts["create_table_ErrorLog"] = ddl_postgres.create_table_ErrorLog
|
|
111
|
+
init_scripts["create_proc_LogExecution"] = ddl_postgres.create_proc_LogExecution
|
|
112
|
+
else:
|
|
113
|
+
raise err.Dv2NotYetImplementedForDialectError(dialect)
|
|
114
|
+
|
|
115
|
+
for name, script in init_scripts.items():
|
|
116
|
+
if verbose:
|
|
117
|
+
print(f"[ok] Executing script: {name}")
|
|
118
|
+
conn.execute(text(script))
|
|
119
|
+
|
|
120
|
+
def get_proc_definition_dml_mssql(proc_param_name: str) -> TextClause:
|
|
121
|
+
return text(f"""
|
|
122
|
+
SELECT sm.definition
|
|
123
|
+
FROM sys.sql_modules sm
|
|
124
|
+
JOIN sys.objects o ON sm.object_id = o.object_id
|
|
125
|
+
JOIN sys.schemas s ON o.schema_id = s.schema_id
|
|
126
|
+
WHERE o.type = 'P'
|
|
127
|
+
AND '['+s.name+'].['+o.name+']' = :{proc_param_name}
|
|
128
|
+
""")
|
|
129
|
+
|
|
130
|
+
def parse_auto_generated_header(full_proc_text: str) -> dict[str, Any]:
|
|
131
|
+
started = False
|
|
132
|
+
rows_in_header = 0
|
|
133
|
+
result: dict[str, Any] = {}
|
|
134
|
+
for ln in full_proc_text.splitlines():
|
|
135
|
+
if started:
|
|
136
|
+
rows_in_header += 1
|
|
137
|
+
if ln.lstrip().startswith("Created on"):
|
|
138
|
+
result["created_on"] = ln.split(":", 1)[1].strip()
|
|
139
|
+
elif ln.lstrip().startswith("Updated on"):
|
|
140
|
+
result["updated_on"] = ln.split(":", 1)[1].strip()
|
|
141
|
+
elif ln.strip() == "*/":
|
|
142
|
+
break
|
|
143
|
+
else:
|
|
144
|
+
continue
|
|
145
|
+
if ln.strip() == "/*":
|
|
146
|
+
started = True
|
|
147
|
+
continue
|
|
148
|
+
result["rows_in_header"] = rows_in_header - 1 if rows_in_header > 0 else 0
|
|
149
|
+
return result
|
sandwich/dv2_helper.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import Connection, Engine, MetaData, Table, select, text
|
|
4
|
+
|
|
5
|
+
from src.sandwich.dialects import DialectHandlerFactory
|
|
6
|
+
from src.sandwich.modeling import get_stg_info, infer_template
|
|
7
|
+
from src.sandwich.strategies import StrategyFactory
|
|
8
|
+
|
|
9
|
+
from . import errors as err
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Dv2Helper:
|
|
13
|
+
|
|
14
|
+
def __init__(self, stg: Table, dialect: str = "mssql", template: str | None = None):
|
|
15
|
+
stg_info = get_stg_info(stg)
|
|
16
|
+
if template is None:
|
|
17
|
+
template = infer_template(stg_info)
|
|
18
|
+
if template not in ("scd2dim", "link2fact"):
|
|
19
|
+
raise ValueError(
|
|
20
|
+
f"Template '{template}' is not supported. Supported templates: scd2dim, link2fact"
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# legacy fields
|
|
24
|
+
self.stg_table = stg
|
|
25
|
+
self.entity_name = stg.name
|
|
26
|
+
self.template = template
|
|
27
|
+
|
|
28
|
+
self.dialect = dialect
|
|
29
|
+
self.dialect_handler = DialectHandlerFactory.create_handler(dialect)
|
|
30
|
+
self.validator = StrategyFactory.create_validator(template)
|
|
31
|
+
self.validation_result = self.validator.validate_staging(stg_info)
|
|
32
|
+
self.schema_generator = StrategyFactory.create_generator(template,
|
|
33
|
+
self.dialect_handler,
|
|
34
|
+
self.validation_result)
|
|
35
|
+
|
|
36
|
+
# Convenience properties (for backward compatibility if needed)
|
|
37
|
+
self.bk_keys = self.validation_result.bk_keys
|
|
38
|
+
self.hk_key = self.validation_result.hk_key
|
|
39
|
+
self.business_column_types = self.validation_result.business_column_types
|
|
40
|
+
self.system_column_types = self.validation_result.system_column_types
|
|
41
|
+
|
|
42
|
+
def call_register_entity(self, conn: Engine | Connection) -> datetime:
|
|
43
|
+
if self.dialect == "mssql":
|
|
44
|
+
call_stmt = "exec core.[register_entity] :entity_name, :template"
|
|
45
|
+
elif self.dialect == "postgres":
|
|
46
|
+
call_stmt = "call core.register_entity (:entity_name, :template)"
|
|
47
|
+
else:
|
|
48
|
+
raise err.Dv2NotYetImplementedForDialectError(self.dialect)
|
|
49
|
+
|
|
50
|
+
conn.execute(
|
|
51
|
+
text(call_stmt),
|
|
52
|
+
{
|
|
53
|
+
"entity_name": self.entity_name,
|
|
54
|
+
"template": self.template,
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
|
|
58
|
+
stmt = select(entities.c.created).where(self.entity_name == entities.c.entity_name)
|
|
59
|
+
return conn.execute(stmt).scalar_one()
|
|
60
|
+
|
|
61
|
+
# def call_job_proc(self, conn: Engine | Connection, parent_execution_id: int = -1) -> None:
|
|
62
|
+
# job_proc_name = self.schema_generator.get_job_proc_name(self.entity_name, self.dialect)
|
|
63
|
+
#
|
|
64
|
+
# if self.dialect == "mssql":
|
|
65
|
+
# call_stmt = f"exec {job_proc_name} :parent_executionID"
|
|
66
|
+
# else:
|
|
67
|
+
# raise err.Dv2NotYetImplementedForDialectError(self.dialect)
|
|
68
|
+
#
|
|
69
|
+
# conn.execute(text(call_stmt), {"parent_executionID": parent_execution_id})
|
|
70
|
+
|
|
71
|
+
def generate_schema(self, conn: Engine | Connection, verbose: bool = False) -> None:
|
|
72
|
+
registered_on = self.call_register_entity(conn)
|
|
73
|
+
if verbose:
|
|
74
|
+
print(f"[ok] Registered `{self.entity_name}` for `{self.template}`")
|
|
75
|
+
|
|
76
|
+
tables = self.schema_generator.make_tables()
|
|
77
|
+
for table_type, table in tables.items():
|
|
78
|
+
if table is not None:
|
|
79
|
+
table.create(conn, checkfirst=True)
|
|
80
|
+
if verbose:
|
|
81
|
+
print(f"[ok] Created table [{table.schema}].[{table.name}]")
|
|
82
|
+
|
|
83
|
+
procedures = self.schema_generator.make_procedures(tables, registered_on)
|
|
84
|
+
for proc_type, (proc_code, proc_name, _) in procedures.items():
|
|
85
|
+
conn.execute(text(proc_code))
|
|
86
|
+
if verbose:
|
|
87
|
+
print(f"[ok] Created or altered {proc_name}")
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def update_registered_entities(cls, conn: Engine | Connection, dialect: str = "mssql",
|
|
91
|
+
verbose: bool = False):
|
|
92
|
+
metadata = MetaData()
|
|
93
|
+
entities = Table("entities", metadata, schema="core", autoload_with=conn)
|
|
94
|
+
select_result = conn.execute(entities.select().where(~entities.c.is_deleted))
|
|
95
|
+
for row in select_result.mappings().all():
|
|
96
|
+
stg = Table(row["entity_name"], metadata, schema="stg", autoload_with=conn)
|
|
97
|
+
dv2 = cls(stg, dialect=dialect, template=row["template"])
|
|
98
|
+
dv2.generate_schema(conn, verbose=verbose)
|
sandwich/errors.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
class _Dv2TemplatedError(Exception):
|
|
4
|
+
"""Override `_template` with a string using `{value}` placeholder and optionally `{field}` placeholder.
|
|
5
|
+
Example: `_template = "User with {field}={value} not found"`
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
_template: str
|
|
9
|
+
|
|
10
|
+
def __init__(self, value: Any, field: str | None = None):
|
|
11
|
+
template = getattr(self, "_template", None)
|
|
12
|
+
if not template:
|
|
13
|
+
raise NotImplementedError("_template is not implemented")
|
|
14
|
+
|
|
15
|
+
if field:
|
|
16
|
+
message = template.format(field=field, value=value)
|
|
17
|
+
else:
|
|
18
|
+
message = template.format(value=value)
|
|
19
|
+
|
|
20
|
+
super().__init__(message)
|
|
21
|
+
|
|
22
|
+
class Dv2NotYetImplementedForDialectError(_Dv2TemplatedError):
|
|
23
|
+
_template = "Not yet implemented for '{value}' dialect"
|
|
24
|
+
def __init__(self, value: Any):
|
|
25
|
+
super().__init__(value)
|
sandwich/main.py
ADDED
|
File without changes
|