sandwich 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sandwich/__init__.py +3 -0
- sandwich/dialects/__init__.py +12 -0
- sandwich/dialects/base.py +136 -0
- sandwich/dialects/ddl_mssql.py +123 -0
- sandwich/dialects/ddl_postgres.py +114 -0
- sandwich/dialects/factory.py +27 -0
- sandwich/dialects/mssql.py +281 -0
- sandwich/dialects/postgres.py +107 -0
- sandwich/dialects/utils.py +147 -0
- sandwich/dwh/__init__.py +82 -0
- sandwich/errors.py +25 -0
- sandwich/main.py +0 -0
- sandwich/modeling/__init__.py +120 -0
- sandwich/modeling/strategies/__init__.py +15 -0
- sandwich/modeling/strategies/base.py +94 -0
- sandwich/modeling/strategies/factory.py +39 -0
- sandwich/modeling/strategies/link2fact.py +225 -0
- sandwich/modeling/strategies/scd2dim.py +228 -0
- {sandwich-0.2.1.dist-info → sandwich-0.3.0.dist-info}/METADATA +170 -155
- sandwich-0.3.0.dist-info/RECORD +23 -0
- sandwich-0.3.0.dist-info/WHEEL +4 -0
- sandwich-0.3.0.dist-info/entry_points.txt +3 -0
- sandwich-0.2.1.dist-info/RECORD +0 -5
- sandwich-0.2.1.dist-info/WHEEL +0 -4
- sandwich-0.2.1.dist-info/licenses/LICENSE +0 -9
sandwich/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Dialects package for SQL code generation."""
|
|
2
|
+
from src.sandwich.dialects.base import DialectHandler
|
|
3
|
+
from src.sandwich.dialects.factory import DialectHandlerFactory
|
|
4
|
+
from src.sandwich.dialects.mssql import MssqlDialectHandler
|
|
5
|
+
from src.sandwich.dialects.postgres import PostgresDialectHandler
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"DialectHandler",
|
|
9
|
+
"DialectHandlerFactory",
|
|
10
|
+
"MssqlDialectHandler",
|
|
11
|
+
"PostgresDialectHandler",
|
|
12
|
+
]
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Table, TextClause
|
|
5
|
+
|
|
6
|
+
class DialectHandler(ABC):
|
|
7
|
+
@abstractmethod
|
|
8
|
+
def get_boolean_type(self): ...
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def get_proc_name_format(self, schema: str, operation: str, entity_name: str) -> str:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def apply_proc_template(self, proc_name: str, sql_body: str, header: str) -> str:
|
|
16
|
+
"""Wrap SQL body in procedure template with error handling and logging.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
proc_name: Name of the procedure
|
|
20
|
+
sql_body: The main SQL logic to execute
|
|
21
|
+
header: Auto-generated header comment
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Complete procedure definition
|
|
25
|
+
"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def make_stg_materialization_proc(
|
|
30
|
+
self,
|
|
31
|
+
entity_name: str,
|
|
32
|
+
header: str
|
|
33
|
+
) -> Tuple[str, str, str]:
|
|
34
|
+
"""Generate staging table materialization procedure.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
entity_name: Entity name
|
|
38
|
+
columns_list: Comma-separated list of columns
|
|
39
|
+
header: Auto-generated header comment
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Tuple of (procedure_code, procedure_name)
|
|
43
|
+
"""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def make_hub_proc(
|
|
48
|
+
self,
|
|
49
|
+
hub_table: Table,
|
|
50
|
+
bk_keys: list,
|
|
51
|
+
header: str
|
|
52
|
+
) -> Tuple[str, str, str]:
|
|
53
|
+
"""Generate hub population procedure.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
hub_table: SQLAlchemy Table object for hub
|
|
57
|
+
bk_keys: List of business key tuples (name, type)
|
|
58
|
+
columns_list: Comma-separated list of columns
|
|
59
|
+
header: Auto-generated header comment
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Tuple of (procedure_code, procedure_name)
|
|
63
|
+
"""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
@abstractmethod
|
|
67
|
+
def make_link_proc(
|
|
68
|
+
self,
|
|
69
|
+
link_table: Table,
|
|
70
|
+
hk_keys: list,
|
|
71
|
+
header: str
|
|
72
|
+
) -> Tuple[str, str, str]:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def make_scd2_sat_proc(
|
|
77
|
+
self,
|
|
78
|
+
sat_table: Table,
|
|
79
|
+
hk_name: str,
|
|
80
|
+
hashdiff_col: str,
|
|
81
|
+
is_available_col: str,
|
|
82
|
+
loaddate_col: str,
|
|
83
|
+
stg_schema: str,
|
|
84
|
+
header: str
|
|
85
|
+
) -> Tuple[str, str, str]:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
@abstractmethod
|
|
89
|
+
def make_scd0_sat_proc(self, sat_table: Table, header: str) -> Tuple[str, str, str]:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def make_scd2_dim_proc(
|
|
94
|
+
self,
|
|
95
|
+
dim_table: Table,
|
|
96
|
+
bk_keys: list,
|
|
97
|
+
header: str
|
|
98
|
+
) -> Tuple[str, str, str]:
|
|
99
|
+
"""Generate dimension SCD2 recalculation procedure.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
dim_table: SQLAlchemy Table object for dimension
|
|
103
|
+
bk_keys: List of business key tuples (name, type)
|
|
104
|
+
columns_list: Comma-separated list of columns
|
|
105
|
+
header: Auto-generated header comment
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Tuple of (procedure_code, procedure_name)
|
|
109
|
+
"""
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def make_job_proc(
|
|
114
|
+
self,
|
|
115
|
+
entity_name: str,
|
|
116
|
+
proc_names: list[str],
|
|
117
|
+
header: str
|
|
118
|
+
) -> Tuple[str, str, str]:
|
|
119
|
+
"""Generate main job orchestration procedure.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
entity_name: Entity name
|
|
123
|
+
hub_proc_name: Name of hub population procedure
|
|
124
|
+
sat_proc_name: Name of satellite population procedure
|
|
125
|
+
dim_proc_name: Name of dimension recalculation procedure
|
|
126
|
+
stg_proc_name: Name of staging materialization procedure (optional)
|
|
127
|
+
header: Auto-generated header comment
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Tuple of (procedure_code, procedure_name)
|
|
131
|
+
"""
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
@abstractmethod
|
|
135
|
+
def make_drop_proc(self, entity_name, table_schemas: list[str], procedures: list[str], header: str) \
|
|
136
|
+
-> Tuple[str, str, str]: ...
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# language=sql
|
|
2
|
+
create_entities_table = """
|
|
3
|
+
if object_id('core.entities') is null
|
|
4
|
+
begin
|
|
5
|
+
create table core.entities (
|
|
6
|
+
[entity_id] bigint primary key identity,
|
|
7
|
+
[entity_name] varchar(100) not null,
|
|
8
|
+
[template] varchar(50) not null,
|
|
9
|
+
[created] datetime2(7) not null default sysdatetime(),
|
|
10
|
+
[updated] datetime2(7) not null default sysdatetime(),
|
|
11
|
+
[is_deleted] bit not null default 0,
|
|
12
|
+
[deleted] datetime2(7) default null,
|
|
13
|
+
unique ([entity_name])
|
|
14
|
+
);
|
|
15
|
+
end
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# language=sql
|
|
19
|
+
create_table_ExecutionLog = """
|
|
20
|
+
if object_id('core.ExecutionLog') is null
|
|
21
|
+
begin
|
|
22
|
+
create table [core].[ExecutionLog](
|
|
23
|
+
[executionID] [bigint] identity(1,1) primary key NOT NULL,
|
|
24
|
+
[procid] [int] NOT NULL,
|
|
25
|
+
[begin_timestamp] [datetime2](7) NOT NULL default (getdate()),
|
|
26
|
+
[end_timestamp] [datetime2](7) NULL default (NULL),
|
|
27
|
+
[errorID] [int] NULL,
|
|
28
|
+
[procname] [varchar](200) NULL,
|
|
29
|
+
[parent_executionID] [bigint] NULL
|
|
30
|
+
)
|
|
31
|
+
end
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# language=sql
|
|
35
|
+
create_table_ErrorLog = """
|
|
36
|
+
if object_id('core.ErrorLog') is null
|
|
37
|
+
begin
|
|
38
|
+
create table [core].[ErrorLog](
|
|
39
|
+
[ErrorID] [int] IDENTITY(1,1) NOT NULL,
|
|
40
|
+
[UserName] [varchar](100) NULL,
|
|
41
|
+
[ErrorNumber] [int] NULL,
|
|
42
|
+
[ErrorState] [int] NULL,
|
|
43
|
+
[ErrorSeverity] [int] NULL,
|
|
44
|
+
[ErrorLine] [int] NULL,
|
|
45
|
+
[ErrorProcedure] [varchar](max) NULL,
|
|
46
|
+
[ErrorMessage] [varchar](max) NULL,
|
|
47
|
+
[ErrorDateTime] [datetime] NULL
|
|
48
|
+
);
|
|
49
|
+
end
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# language=sql
|
|
53
|
+
create_func_StringToHash = """
|
|
54
|
+
create or alter function [core].[StringToHash1]
|
|
55
|
+
(
|
|
56
|
+
@StrValue1 nvarchar(1000)
|
|
57
|
+
) returns char(40) as
|
|
58
|
+
begin
|
|
59
|
+
declare @result char(40);
|
|
60
|
+
set @result = upper(convert(char(40), hashbytes('sha1',
|
|
61
|
+
upper(rtrim(ltrim(isnull(@StrValue1, ''))))
|
|
62
|
+
), 2));
|
|
63
|
+
return @result;
|
|
64
|
+
end
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
# language=sql
|
|
68
|
+
create_schemas = """
|
|
69
|
+
if schema_id('core') is null
|
|
70
|
+
exec ('create schema core')
|
|
71
|
+
if schema_id('stg') is null
|
|
72
|
+
exec ('create schema stg')
|
|
73
|
+
if schema_id('hub') is null
|
|
74
|
+
exec ('create schema hub')
|
|
75
|
+
if schema_id('sat') is null
|
|
76
|
+
exec ('create schema sat')
|
|
77
|
+
if schema_id('link') is null
|
|
78
|
+
exec ('create schema link')
|
|
79
|
+
if schema_id('dim') is null
|
|
80
|
+
exec ('create schema dim')
|
|
81
|
+
if schema_id('fact') is null
|
|
82
|
+
exec ('create schema fact')
|
|
83
|
+
if schema_id('elt') is null
|
|
84
|
+
exec ('create schema elt')
|
|
85
|
+
if schema_id('job') is null
|
|
86
|
+
exec ('create schema job')
|
|
87
|
+
if schema_id('meta') is null
|
|
88
|
+
exec ('create schema meta')
|
|
89
|
+
if schema_id('proxy') is null
|
|
90
|
+
exec ('create schema proxy')
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
# language=sql
|
|
94
|
+
create_proc_LogExecution = """
|
|
95
|
+
create or alter proc [core].[LogExecution]
|
|
96
|
+
(
|
|
97
|
+
@procid int,
|
|
98
|
+
@executionID_in bigint,
|
|
99
|
+
@executionID_out bigint out,
|
|
100
|
+
@parent_executionID bigint = null
|
|
101
|
+
) as
|
|
102
|
+
begin
|
|
103
|
+
set nocount on;
|
|
104
|
+
|
|
105
|
+
if @executionID_in is not null
|
|
106
|
+
begin
|
|
107
|
+
update [core].[ExecutionLog]
|
|
108
|
+
set [end_timestamp] = getdate()
|
|
109
|
+
where executionID = @executionID_in;
|
|
110
|
+
|
|
111
|
+
set @executionID_out = @executionID_in;
|
|
112
|
+
end else
|
|
113
|
+
begin
|
|
114
|
+
|
|
115
|
+
declare @out table (executionID int);
|
|
116
|
+
insert into [core].[ExecutionLog] (procid, procname, parent_executionID) output inserted.executionID
|
|
117
|
+
into @out
|
|
118
|
+
values (@procid, object_name(@procid), @parent_executionID);
|
|
119
|
+
|
|
120
|
+
set @executionID_out = (select executionID from @out);
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
"""
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# language=sql
|
|
2
|
+
create_extensions = """
|
|
3
|
+
create extension if not exists pgcrypto;
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
# language=sql
|
|
7
|
+
create_entities_table = """
|
|
8
|
+
CREATE TABLE IF NOT EXISTS core.entities (
|
|
9
|
+
entity_id BIGSERIAL PRIMARY KEY,
|
|
10
|
+
entity_name VARCHAR(100) NOT NULL,
|
|
11
|
+
template VARCHAR(50) NOT NULL,
|
|
12
|
+
created TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
13
|
+
updated TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
14
|
+
is_deleted BOOLEAN NOT NULL DEFAULT FALSE,
|
|
15
|
+
deleted TIMESTAMP DEFAULT NULL,
|
|
16
|
+
UNIQUE (entity_name)
|
|
17
|
+
);
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
# language=sql
|
|
21
|
+
create_func_StringToHash= """
|
|
22
|
+
create or replace function core.string_to_hash1(str_value text)
|
|
23
|
+
returns char(40)
|
|
24
|
+
language plpgsql
|
|
25
|
+
as $$
|
|
26
|
+
declare
|
|
27
|
+
result char(40);
|
|
28
|
+
begin
|
|
29
|
+
if str_value is null
|
|
30
|
+
or str_value in ('(unknown)', 'empty')
|
|
31
|
+
then
|
|
32
|
+
result := repeat('0', 40);
|
|
33
|
+
else
|
|
34
|
+
result :=
|
|
35
|
+
upper(
|
|
36
|
+
encode(
|
|
37
|
+
digest(upper(trim(str_value)), 'sha1'),
|
|
38
|
+
'hex'
|
|
39
|
+
)
|
|
40
|
+
);
|
|
41
|
+
end if;
|
|
42
|
+
|
|
43
|
+
return cast(result as char(40));
|
|
44
|
+
end;
|
|
45
|
+
$$;
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
# language=sql
|
|
49
|
+
create_table_ExecutionLog = """
|
|
50
|
+
CREATE TABLE IF NOT EXISTS core.ExecutionLog (
|
|
51
|
+
executionID BIGSERIAL PRIMARY KEY,
|
|
52
|
+
procid INT NOT NULL,
|
|
53
|
+
begin_timestamp TIMESTAMP NOT NULL DEFAULT NOW(),
|
|
54
|
+
end_timestamp TIMESTAMP DEFAULT NULL,
|
|
55
|
+
errorID INT DEFAULT NULL,
|
|
56
|
+
procname VARCHAR(200) DEFAULT NULL,
|
|
57
|
+
parent_executionID BIGINT DEFAULT NULL
|
|
58
|
+
);
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
# language=sql
|
|
62
|
+
create_table_ErrorLog = """
|
|
63
|
+
CREATE TABLE IF NOT EXISTS core.ErrorLog (
|
|
64
|
+
ErrorID SERIAL PRIMARY KEY,
|
|
65
|
+
UserName VARCHAR(100) DEFAULT NULL,
|
|
66
|
+
ErrorNumber INT DEFAULT NULL,
|
|
67
|
+
ErrorState INT DEFAULT NULL,
|
|
68
|
+
ErrorSeverity INT DEFAULT NULL,
|
|
69
|
+
ErrorLine INT DEFAULT NULL,
|
|
70
|
+
ErrorProcedure TEXT DEFAULT NULL,
|
|
71
|
+
ErrorMessage TEXT DEFAULT NULL,
|
|
72
|
+
ErrorDateTime TIMESTAMP DEFAULT NULL
|
|
73
|
+
);
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
# language=sql
|
|
77
|
+
create_proc_LogExecution = """
|
|
78
|
+
CREATE OR REPLACE PROCEDURE core.LogExecution(
|
|
79
|
+
p_procid INT,
|
|
80
|
+
p_executionID_in BIGINT,
|
|
81
|
+
INOUT p_executionID_out BIGINT,
|
|
82
|
+
p_parent_executionID BIGINT DEFAULT NULL
|
|
83
|
+
)
|
|
84
|
+
LANGUAGE plpgsql
|
|
85
|
+
AS $$
|
|
86
|
+
BEGIN
|
|
87
|
+
IF p_executionID_in IS NOT NULL THEN
|
|
88
|
+
UPDATE core.ExecutionLog
|
|
89
|
+
SET end_timestamp = NOW()
|
|
90
|
+
WHERE executionID = p_executionID_in;
|
|
91
|
+
|
|
92
|
+
p_executionID_out := p_executionID_in;
|
|
93
|
+
ELSE
|
|
94
|
+
INSERT INTO core.ExecutionLog (procid, procname, parent_executionID)
|
|
95
|
+
VALUES (p_procid, NULL, p_parent_executionID)
|
|
96
|
+
RETURNING executionID INTO p_executionID_out;
|
|
97
|
+
END IF;
|
|
98
|
+
END;
|
|
99
|
+
$$;
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
# language=sql
|
|
103
|
+
create_schemas = f"""
|
|
104
|
+
CREATE SCHEMA IF NOT EXISTS core;
|
|
105
|
+
CREATE SCHEMA IF NOT EXISTS stg;
|
|
106
|
+
CREATE SCHEMA IF NOT EXISTS hub;
|
|
107
|
+
CREATE SCHEMA IF NOT EXISTS sat;
|
|
108
|
+
CREATE SCHEMA IF NOT EXISTS dim;
|
|
109
|
+
CREATE SCHEMA IF NOT EXISTS fact;
|
|
110
|
+
CREATE SCHEMA IF NOT EXISTS elt;
|
|
111
|
+
CREATE SCHEMA IF NOT EXISTS job;
|
|
112
|
+
CREATE SCHEMA IF NOT EXISTS meta;
|
|
113
|
+
CREATE SCHEMA IF NOT EXISTS proxy;
|
|
114
|
+
"""
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from src.sandwich.dialects.base import DialectHandler
|
|
2
|
+
from src.sandwich.dialects.mssql import MssqlDialectHandler
|
|
3
|
+
from src.sandwich.dialects.postgres import PostgresDialectHandler
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DialectHandlerFactory:
|
|
7
|
+
_handlers = {
|
|
8
|
+
"mssql": MssqlDialectHandler,
|
|
9
|
+
"postgres": PostgresDialectHandler,
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
@classmethod
|
|
13
|
+
def register_dialect(cls, dialect_name: str, handler_class):
|
|
14
|
+
cls._handlers[dialect_name] = handler_class
|
|
15
|
+
|
|
16
|
+
@classmethod
|
|
17
|
+
def create_handler(cls, dialect: str) -> DialectHandler:
|
|
18
|
+
if dialect not in cls._handlers:
|
|
19
|
+
available = ", ".join(cls._handlers.keys())
|
|
20
|
+
raise ValueError(f"Unknown dialect '{dialect}'. Available dialects: {available}")
|
|
21
|
+
|
|
22
|
+
handler_class = cls._handlers[dialect]
|
|
23
|
+
return handler_class()
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def get_available_dialects(cls) -> list[str]:
|
|
27
|
+
return list(cls._handlers.keys())
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"""MSSQL dialect handler for SQL code generation."""
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import dialects, Table, text
|
|
5
|
+
|
|
6
|
+
#from sandwich import SANDWICH_VERSION
|
|
7
|
+
#from sandwich.modeling import modeling_metadata
|
|
8
|
+
#from sandwich.modeling.strategies.base import ValidationResult
|
|
9
|
+
|
|
10
|
+
from .base import DialectHandler
|
|
11
|
+
from .utils import get_columns_list
|
|
12
|
+
|
|
13
|
+
class MssqlDialectHandler(DialectHandler):
|
|
14
|
+
"""Dialect handler for Microsoft SQL Server."""
|
|
15
|
+
|
|
16
|
+
def get_boolean_type(self):
|
|
17
|
+
return dialects.mssql.BIT
|
|
18
|
+
|
|
19
|
+
def get_proc_name_format(self, schema: str, operation: str, entity_name: str) -> str:
|
|
20
|
+
"""Get MSSQL procedure naming format."""
|
|
21
|
+
return f"[{schema}].[{operation}_{entity_name}]"
|
|
22
|
+
|
|
23
|
+
def apply_proc_template(self, proc_name: str, sql_body: str, header: str) -> str:
|
|
24
|
+
"""Wrap SQL body in MSSQL procedure template with error handling."""
|
|
25
|
+
# language=sql
|
|
26
|
+
proc_template_sql = f"""
|
|
27
|
+
{header}
|
|
28
|
+
create or alter proc {proc_name} (@parent_executionID bigint = null) as
|
|
29
|
+
begin
|
|
30
|
+
set nocount on;
|
|
31
|
+
|
|
32
|
+
declare @executionID bigint;
|
|
33
|
+
exec core.LogExecution @@PROCID, null, @executionID out, @parent_executionID;
|
|
34
|
+
|
|
35
|
+
begin try
|
|
36
|
+
{sql_body}
|
|
37
|
+
exec core.LogExecution @@PROCID, @executionID, @executionID out;
|
|
38
|
+
end try
|
|
39
|
+
begin catch
|
|
40
|
+
declare @err table (ErrorID int);
|
|
41
|
+
declare @ErrorMessage NVARCHAR(4000);
|
|
42
|
+
declare @ErrorSeverity INT;
|
|
43
|
+
declare @ErrorState INT;
|
|
44
|
+
|
|
45
|
+
set @ErrorMessage = ERROR_MESSAGE();
|
|
46
|
+
set @ErrorSeverity = ERROR_SEVERITY();
|
|
47
|
+
set @ErrorState = ERROR_STATE();
|
|
48
|
+
|
|
49
|
+
insert into core.ErrorLog
|
|
50
|
+
output inserted.ErrorID into @err
|
|
51
|
+
values (
|
|
52
|
+
SUSER_SNAME(),
|
|
53
|
+
ERROR_NUMBER(),
|
|
54
|
+
@ErrorState,
|
|
55
|
+
@ErrorSeverity,
|
|
56
|
+
ERROR_LINE(),
|
|
57
|
+
ERROR_PROCEDURE(),
|
|
58
|
+
@ErrorMessage,
|
|
59
|
+
getdate()
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
update [core].[ExecutionLog]
|
|
63
|
+
set [errorID] = (select ErrorID from @err)
|
|
64
|
+
, [end_timestamp] = getdate()
|
|
65
|
+
where [executionID] = @executionID;
|
|
66
|
+
|
|
67
|
+
RAISERROR (
|
|
68
|
+
@ErrorMessage,
|
|
69
|
+
@ErrorSeverity,
|
|
70
|
+
@ErrorState
|
|
71
|
+
);
|
|
72
|
+
end catch
|
|
73
|
+
end
|
|
74
|
+
"""
|
|
75
|
+
return proc_template_sql
|
|
76
|
+
|
|
77
|
+
def make_stg_materialization_proc(self, entity_name: str, header: str) -> Tuple[str, str, str]:
|
|
78
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_stg", entity_name)
|
|
79
|
+
|
|
80
|
+
# language=sql
|
|
81
|
+
proc_body = f"""
|
|
82
|
+
if object_id('stg.{entity_name}') is not null drop table stg.{entity_name};
|
|
83
|
+
select *
|
|
84
|
+
into stg.{entity_name}
|
|
85
|
+
from proxy.{entity_name};
|
|
86
|
+
"""
|
|
87
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
88
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
89
|
+
|
|
90
|
+
def make_hub_proc(self, hub_table: Table, bk_keys: list, header: str) -> Tuple[str, str, str]:
|
|
91
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_{hub_table.schema}", hub_table.name)
|
|
92
|
+
where_fields_list_str = " and ".join([f"hub.[{bk[0]}] = stg.[{bk[0]}]" for bk in bk_keys])
|
|
93
|
+
columns_list = get_columns_list(hub_table)
|
|
94
|
+
|
|
95
|
+
# language=sql
|
|
96
|
+
proc_body = f"""
|
|
97
|
+
insert into [{hub_table.schema}].[{hub_table.name}]
|
|
98
|
+
({columns_list})
|
|
99
|
+
select distinct {get_columns_list(hub_table, alias="stg")}
|
|
100
|
+
from stg.[{hub_table.name}] as stg
|
|
101
|
+
where not exists (
|
|
102
|
+
select *
|
|
103
|
+
from [{hub_table.schema}].[{hub_table.name}] as hub
|
|
104
|
+
where {where_fields_list_str}
|
|
105
|
+
);
|
|
106
|
+
"""
|
|
107
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
108
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
109
|
+
|
|
110
|
+
def make_link_proc(self, link_table: Table, hk_keys: list, header: str) -> Tuple[str, str, str]:
|
|
111
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_{link_table.schema}", link_table.name)
|
|
112
|
+
where_fields_list_str = "\n\t\tand ".join([f"link.[{hk[0]}] = stg.[{hk[0]}]" for hk in hk_keys if hk[0] != f"hk_{link_table.name}"])
|
|
113
|
+
columns_list = get_columns_list(link_table)
|
|
114
|
+
|
|
115
|
+
# language=sql
|
|
116
|
+
proc_body = f"""
|
|
117
|
+
insert into [{link_table.schema}].[{link_table.name}]
|
|
118
|
+
({columns_list})
|
|
119
|
+
select distinct {get_columns_list(link_table, alias="stg")}
|
|
120
|
+
from stg.[{link_table.name}] as stg
|
|
121
|
+
where not exists (
|
|
122
|
+
select *
|
|
123
|
+
from [{link_table.schema}].[{link_table.name}] as link
|
|
124
|
+
where {where_fields_list_str}
|
|
125
|
+
);
|
|
126
|
+
"""
|
|
127
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
128
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
129
|
+
|
|
130
|
+
def make_scd0_sat_proc(self, sat_table: Table, header: str) -> Tuple[str, str, str]:
|
|
131
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_{sat_table.schema}", sat_table.name)
|
|
132
|
+
columns_list = get_columns_list(sat_table)
|
|
133
|
+
hk_name = f"hk_{sat_table.name}"
|
|
134
|
+
|
|
135
|
+
# language=sql
|
|
136
|
+
proc_body = f"""
|
|
137
|
+
insert into [{sat_table.schema}].[{sat_table.name}]
|
|
138
|
+
({columns_list})
|
|
139
|
+
select {get_columns_list(sat_table, alias="stg")}
|
|
140
|
+
from stg.[{sat_table.name}] stg
|
|
141
|
+
where not exists (
|
|
142
|
+
select *
|
|
143
|
+
from sat.[{sat_table.name}] sat
|
|
144
|
+
where stg.[{hk_name}] = sat.[{hk_name}]
|
|
145
|
+
)
|
|
146
|
+
"""
|
|
147
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
148
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
149
|
+
|
|
150
|
+
def make_scd2_sat_proc(
|
|
151
|
+
self,
|
|
152
|
+
sat_table: Table,
|
|
153
|
+
hk_name: str,
|
|
154
|
+
hashdiff_col: str,
|
|
155
|
+
is_available_col: str,
|
|
156
|
+
loaddate_col: str,
|
|
157
|
+
stg_schema: str,
|
|
158
|
+
header: str
|
|
159
|
+
) -> Tuple[str, str, str]:
|
|
160
|
+
proc_name = self.get_proc_name_format("elt", f"Populate_{sat_table.schema}", sat_table.name)
|
|
161
|
+
columns_list = get_columns_list(sat_table)
|
|
162
|
+
|
|
163
|
+
def smart_replace(column_name: str) -> str:
|
|
164
|
+
if column_name == "LoadDate":
|
|
165
|
+
result = "sysdatetime() as [LoadDate]"
|
|
166
|
+
elif column_name == "IsAvailable":
|
|
167
|
+
result = "cast(0 as bit) as [IsAvailable]"
|
|
168
|
+
else:
|
|
169
|
+
result = f"sat.[{column_name}]"
|
|
170
|
+
return result
|
|
171
|
+
|
|
172
|
+
select_columns_list = ", ".join([smart_replace(col.name) for col in sat_table.columns.values()])
|
|
173
|
+
|
|
174
|
+
if stg_schema == "proxy":
|
|
175
|
+
stg_table_name = f"stg.[{sat_table.name}]"
|
|
176
|
+
materialization_stmt = ""
|
|
177
|
+
else:
|
|
178
|
+
stg_table_name = "#materialized"
|
|
179
|
+
materialization_stmt = f"""
|
|
180
|
+
select distinct {columns_list}
|
|
181
|
+
into #materialized
|
|
182
|
+
-- drop table #materialized
|
|
183
|
+
from stg.[{sat_table.name}];
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
# language=sql
|
|
187
|
+
proc_body = f"""{materialization_stmt}
|
|
188
|
+
with ranked_history as
|
|
189
|
+
(
|
|
190
|
+
select {columns_list}
|
|
191
|
+
, row_number() over (partition by [{hk_name}] order by [{loaddate_col}] desc) [DescRank]
|
|
192
|
+
from [{sat_table.schema}].[{sat_table.name}]
|
|
193
|
+
)
|
|
194
|
+
insert into [{sat_table.schema}].[{sat_table.name}]
|
|
195
|
+
({columns_list})
|
|
196
|
+
select {get_columns_list(sat_table, alias="stg")}
|
|
197
|
+
from {stg_table_name} stg
|
|
198
|
+
where not exists (
|
|
199
|
+
select *
|
|
200
|
+
from ranked_history sat
|
|
201
|
+
where sat.[DescRank] = 1
|
|
202
|
+
and stg.[{hk_name}] = sat.[{hk_name}]
|
|
203
|
+
and stg.[{hashdiff_col}] = sat.[{hashdiff_col}]
|
|
204
|
+
and sat.[{is_available_col}] = 1
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
union all
|
|
208
|
+
|
|
209
|
+
select {select_columns_list}
|
|
210
|
+
from ranked_history sat
|
|
211
|
+
where not exists (
|
|
212
|
+
select *
|
|
213
|
+
from {stg_table_name} stg
|
|
214
|
+
where stg.[{hk_name}] = sat.[{hk_name}]
|
|
215
|
+
)
|
|
216
|
+
and sat.[DescRank] = 1
|
|
217
|
+
and sat.[{is_available_col}] = 1;
|
|
218
|
+
"""
|
|
219
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
220
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
221
|
+
|
|
222
|
+
def make_scd2_dim_proc(self, dim_table: Table, bk_keys: list, header: str) -> Tuple[str, str, str]:
|
|
223
|
+
proc_name = self.get_proc_name_format("elt", f"Recalculate_{dim_table.schema}", dim_table.name)
|
|
224
|
+
columns_list = get_columns_list(dim_table)
|
|
225
|
+
pk_keys = lambda: ", ".join([f"sat.[{bk[0]}]" for bk in bk_keys])
|
|
226
|
+
|
|
227
|
+
def smart_replace(column_name: str) -> str:
|
|
228
|
+
if column_name == "DateFrom":
|
|
229
|
+
result = "sat.LoadDate as [DateFrom]"
|
|
230
|
+
elif column_name == "DateTo":
|
|
231
|
+
result = f"lead(dateadd(microsecond, -1, sat.LoadDate), 1, '9999-12-31 23:59:59.9999999') over (partition by {pk_keys()} order by sat.LoadDate) [DateTo]"
|
|
232
|
+
elif column_name == "IsCurrent":
|
|
233
|
+
result = f"iif(lead(sat.LoadDate) over (partition by {pk_keys()} order by sat.LoadDate) is null, 1, 0) [IsCurrent]"
|
|
234
|
+
else:
|
|
235
|
+
result = f"sat.[{column_name}]"
|
|
236
|
+
return result
|
|
237
|
+
|
|
238
|
+
select_columns_list = "\n\t, ".join([smart_replace(col.name) for col in dim_table.columns.values()])
|
|
239
|
+
|
|
240
|
+
# language=sql
|
|
241
|
+
proc_body = f"""
|
|
242
|
+
truncate table [{dim_table.schema}].[{dim_table.name}];
|
|
243
|
+
|
|
244
|
+
insert into [{dim_table.schema}].[{dim_table.name}]
|
|
245
|
+
({columns_list})
|
|
246
|
+
select {select_columns_list}
|
|
247
|
+
from sat.[{dim_table.name}] sat
|
|
248
|
+
"""
|
|
249
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
250
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
251
|
+
|
|
252
|
+
def make_job_proc(self, entity_name: str, proc_names: list[str], header: str) -> Tuple[str, str, str]:
|
|
253
|
+
proc_name = f"[job].[Run_all_related_to_{entity_name}]"
|
|
254
|
+
proc_body = "\n\t"
|
|
255
|
+
for proc in proc_names:
|
|
256
|
+
if proc is None: continue
|
|
257
|
+
proc_body += f"exec {proc} @executionID;\n\t"
|
|
258
|
+
|
|
259
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
260
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
261
|
+
|
|
262
|
+
def make_drop_proc(self, entity_name, table_schemas: list[str], procedures: list[str], header: str) -> Tuple[str, str, str]:
|
|
263
|
+
proc_name = f"[meta].[Drop_all_related_to_{entity_name}]"
|
|
264
|
+
proc_body = "\n\t"
|
|
265
|
+
for proc in procedures:
|
|
266
|
+
if proc is None: continue
|
|
267
|
+
proc_body += f"drop procedure if exists {proc};\n\t"
|
|
268
|
+
proc_body += "\n\t"
|
|
269
|
+
for schema in table_schemas:
|
|
270
|
+
proc_body += f"drop table if exists [{schema}].[{entity_name}];\n\t"
|
|
271
|
+
proc_body += "\n\t"
|
|
272
|
+
proc_body += \
|
|
273
|
+
f"""update core.[entities]
|
|
274
|
+
set [deleted] = sysdatetime()
|
|
275
|
+
, [is_deleted] = 1
|
|
276
|
+
where [entity_name] = '{entity_name}'
|
|
277
|
+
"""
|
|
278
|
+
|
|
279
|
+
proc_code = self.apply_proc_template(proc_name, proc_body, header)
|
|
280
|
+
return proc_code, proc_name, f"exec {proc_name}"
|
|
281
|
+
|