sandwich 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sandwich-0.2.1 → sandwich-0.3.0}/PKG-INFO +170 -155
- {sandwich-0.2.1 → sandwich-0.3.0}/README.md +43 -29
- {sandwich-0.2.1 → sandwich-0.3.0}/pyproject.toml +8 -4
- sandwich-0.3.0/src/sandwich/__init__.py +3 -0
- sandwich-0.3.0/src/sandwich/dialects/__init__.py +12 -0
- sandwich-0.3.0/src/sandwich/dialects/base.py +136 -0
- sandwich-0.3.0/src/sandwich/dialects/ddl_mssql.py +123 -0
- sandwich-0.3.0/src/sandwich/dialects/ddl_postgres.py +114 -0
- sandwich-0.3.0/src/sandwich/dialects/factory.py +27 -0
- sandwich-0.3.0/src/sandwich/dialects/mssql.py +281 -0
- sandwich-0.3.0/src/sandwich/dialects/postgres.py +107 -0
- sandwich-0.3.0/src/sandwich/dialects/utils.py +147 -0
- sandwich-0.3.0/src/sandwich/dwh/__init__.py +82 -0
- sandwich-0.3.0/src/sandwich/errors.py +25 -0
- sandwich-0.3.0/src/sandwich/main.py +0 -0
- sandwich-0.3.0/src/sandwich/modeling/__init__.py +120 -0
- sandwich-0.3.0/src/sandwich/modeling/strategies/__init__.py +15 -0
- sandwich-0.3.0/src/sandwich/modeling/strategies/base.py +94 -0
- sandwich-0.3.0/src/sandwich/modeling/strategies/factory.py +39 -0
- sandwich-0.3.0/src/sandwich/modeling/strategies/link2fact.py +225 -0
- sandwich-0.3.0/src/sandwich/modeling/strategies/scd2dim.py +228 -0
- sandwich-0.2.1/.gitignore +0 -13
- sandwich-0.2.1/LICENSE +0 -9
- {sandwich-0.2.1 → sandwich-0.3.0/src}/sandwich/py.typed +0 -0
|
@@ -1,164 +1,179 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: sandwich
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: DataVault 2.0 code gen
|
|
5
|
-
Author-email: Andrey Morozov <andrey@morozov.lv>
|
|
6
|
-
License-File: LICENSE
|
|
7
5
|
Keywords: DWH,Data Vault 2.0
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
Classifier:
|
|
6
|
+
Author: Andrey Morozov
|
|
7
|
+
Author-email: Andrey Morozov <andrey@morozov.lv>
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
9
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Topic :: Database
|
|
12
11
|
Classifier: Operating System :: OS Independent
|
|
13
|
-
Classifier:
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Development Status :: 2 - Pre-Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
14
15
|
Classifier: Typing :: Typed
|
|
15
|
-
Requires-Python: >=3.14
|
|
16
16
|
Requires-Dist: sqlalchemy
|
|
17
|
+
Requires-Python: >=3.14
|
|
17
18
|
Description-Content-Type: text/markdown
|
|
18
19
|
|
|
19
|
-
## Data Vault 2.0 scaffolding tool
|
|
20
|
-
This tool is designed to streamline the process of creating Data Vault 2.0 entities,
|
|
21
|
-
such as hubs, links, and satellites.
|
|
22
|
-
As well as building information layer objects such as dim and fact tables
|
|
23
|
-
from a multidimensional paradigm.
|
|
24
|
-
|
|
25
|
-
### How it works:
|
|
26
|
-
User: provides a staging view `stg.[entity_name]` (or a table if the staging layer persisted)
|
|
27
|
-
with all requirements for the `[entity_name]` defined in the schema (how to define see below).
|
|
28
|
-
Tool:
|
|
29
|
-
1. Validates metadata of the provided staging view or table.
|
|
30
|
-
2. Generates the necessary DDL statements to create the Data Vault 2.0 entities.
|
|
31
|
-
3. Generates ELT procedures to load data to the generated entities.
|
|
32
|
-
4. Generates support procedures such as `meta.Drop_all_related_to_[entity_name]` and `elt.Run_all_related_to_[entity_name]`
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
) [
|
|
88
|
-
,
|
|
89
|
-
, cast('
|
|
90
|
-
,
|
|
91
|
-
|
|
92
|
-
, cast('
|
|
93
|
-
, cast(
|
|
94
|
-
, cast(
|
|
95
|
-
, cast('
|
|
96
|
-
, cast(
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
| | |
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
|
115
|
-
|
|
116
|
-
|
|
|
117
|
-
|
|
|
118
|
-
|
|
|
119
|
-
|
|
|
120
|
-
|
|
|
121
|
-
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
###
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
20
|
+
## Data Vault 2.0 scaffolding tool
|
|
21
|
+
This tool is designed to streamline the process of creating Data Vault 2.0 entities,
|
|
22
|
+
such as hubs, links, and satellites.
|
|
23
|
+
As well as building information layer objects such as dim and fact tables
|
|
24
|
+
from a multidimensional paradigm.
|
|
25
|
+
|
|
26
|
+
### How it works:
|
|
27
|
+
User: provides a staging view `stg.[entity_name]` (or a table if the staging layer persisted)
|
|
28
|
+
with all requirements for the `[entity_name]` defined in the schema (how to define see below).
|
|
29
|
+
Tool:
|
|
30
|
+
1. Validates metadata of the provided staging view or table.
|
|
31
|
+
2. Generates the necessary DDL statements to create the Data Vault 2.0 entities.
|
|
32
|
+
3. Generates ELT procedures to load data to the generated entities.
|
|
33
|
+
4. Generates support procedures such as `meta.Drop_all_related_to_[entity_name]` and `elt.Run_all_related_to_[entity_name]`
|
|
34
|
+
|
|
35
|
+
#### App design (layers):
|
|
36
|
+
DV2Modeler (service)
|
|
37
|
+
1. gets user input (stg) and analyzes it, producing `stg_info`
|
|
38
|
+
2. chooses strategy (`scd2dim`, `link2fact`)
|
|
39
|
+
|
|
40
|
+
Strategy (algorithm)
|
|
41
|
+
1. validates staging using `stg_info`
|
|
42
|
+
2. generates schema using dialects handler
|
|
43
|
+
|
|
44
|
+
Dialect handler (repository)
|
|
45
|
+
1. creates DB objects for postgres or MSSQL database
|
|
46
|
+
|
|
47
|
+
```text
|
|
48
|
+
+----------------------+
|
|
49
|
+
| hub.[entity_name] |
|
|
50
|
+
+----------------------+
|
|
51
|
+
^
|
|
52
|
+
o 1.define +-------------------+ | 3.create
|
|
53
|
+
/|\ -------> | stg.[entity_name] | # +----------------------+
|
|
54
|
+
/ \ +-------------------+ /|\ ---------> | sat.[entity_name] |
|
|
55
|
+
User ---------------------------------------> / \ 3.create +----------------------+
|
|
56
|
+
2.use Tool
|
|
57
|
+
| 3.create
|
|
58
|
+
v
|
|
59
|
+
+----------------------+
|
|
60
|
+
| dim.[entity_name] |
|
|
61
|
+
+----------------------+
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
### How to define a staging view or table:
|
|
66
|
+
* `bk_` (BusinessKey) - at least one `bk_` column
|
|
67
|
+
* `hk_[entity_name]` (HashKey) - exactly one `hk_[entity_name]` column if you want a `hub` table created
|
|
68
|
+
* `LoadDate` - required by dv2 standard for an auditability
|
|
69
|
+
* `RecordSource` - required by dv2 standard for an auditability
|
|
70
|
+
* `HashDiff` - optional, required if you want to have a scd2 type `dim` table created
|
|
71
|
+
* `IsAvailable` - optional, required if you want to track missing/deleted records
|
|
72
|
+
* all other columns will be considered as business columns and will be included to the `sat` table definition
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
| staging fields | scd2dim profile | link2fact profile |
|
|
76
|
+
|--------------------|-----------------|-------------------|
|
|
77
|
+
| bk_ | ✅ | |
|
|
78
|
+
| hk_`[entity_name]` | ✅ | |
|
|
79
|
+
| LoadDate | ✅ | |
|
|
80
|
+
| RecordSource | ✅ | |
|
|
81
|
+
| HashDiff | ✅ | |
|
|
82
|
+
| IsAvailable | ✅ | |
|
|
83
|
+
|
|
84
|
+
```sql
|
|
85
|
+
-- staging view example for the scd2dim profile (mssql)
|
|
86
|
+
create view [stg].[UR_officers] as
|
|
87
|
+
select cast(31 as bigint) [bk_id]
|
|
88
|
+
, core.StringToHash1(cast(31 as bigint)) [hk_UR_officers]
|
|
89
|
+
, sysdatetime() [LoadDate]
|
|
90
|
+
, cast('LobSystem.dbo.officers_daily' as varchar(200)) [RecordSource]
|
|
91
|
+
, core.StringToHash8(
|
|
92
|
+
cast('uri' as nvarchar(100))
|
|
93
|
+
, cast('00000000000000' as varchar(20))
|
|
94
|
+
, cast('NATURAL_PERSON' as varchar(50))
|
|
95
|
+
, cast(null as varchar(20))
|
|
96
|
+
, cast('INDIVIDUALLY' as varchar(50))
|
|
97
|
+
, cast(0 as int)
|
|
98
|
+
, cast('2008-04-07' as date)
|
|
99
|
+
, cast('2008-04-07 18:00:54.000' as datetime)
|
|
100
|
+
) [HashDiff]
|
|
101
|
+
, cast('uri' as nvarchar(100)) [uri]
|
|
102
|
+
, cast('00000000000000' as varchar(20)) [at_legal_entity_registration_number]
|
|
103
|
+
, cast('NATURAL_PERSON' as varchar(50)) [entity_type]
|
|
104
|
+
, cast(null as varchar(20)) [legal_entity_registration_number]
|
|
105
|
+
, cast('INDIVIDUALLY' as varchar(50)) [rights_of_representation_type]
|
|
106
|
+
, cast(0 as int) [representation_with_at_least]
|
|
107
|
+
, cast('2008-04-07' as date) [registered_on]
|
|
108
|
+
, cast('2008-04-07 18:00:54.000' as datetime) [last_modified_at]
|
|
109
|
+
, cast(1 as bit) [IsAvailable]
|
|
110
|
+
```
|
|
111
|
+
### scd2dim profile columns mapping:
|
|
112
|
+
| stg | hub | sat | dim |
|
|
113
|
+
|--------------------|------------------------|----------------------------|--------------------|
|
|
114
|
+
| | | | hk_`[entity_name]` |
|
|
115
|
+
| BKs... | (uk)BKs... | BKs... | (pk)BKs... |
|
|
116
|
+
| hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
|
|
117
|
+
| LoadDate | LoadDate | (pk)LoadDate | |
|
|
118
|
+
| RecordSource | RecordSource | RecordSource | |
|
|
119
|
+
| HashDiff | | HashDiff | |
|
|
120
|
+
| FLDs... | | FLDs... | FLDs... |
|
|
121
|
+
| IsAvailable | | IsAvailable | IsAvailable |
|
|
122
|
+
| | | | IsCurrent |
|
|
123
|
+
| | | | (pk)DateFrom |
|
|
124
|
+
| | | | DateTo |
|
|
125
|
+
|
|
126
|
+
### link2fact profile columns mapping:
|
|
127
|
+
| stg | link | sat | fact |
|
|
128
|
+
|--------------------|--------------------------------|----------------------------|------|
|
|
129
|
+
| HKs... | (uk)(fk)hk_`other_entity_name` | | |
|
|
130
|
+
| hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
|
|
131
|
+
| <degenerate_field> | (uk)<degenerate_field> | <degenerate_field> | |
|
|
132
|
+
| LoadDate | LoadDate | LoadDate | |
|
|
133
|
+
| RecordSource | RecordSource | RecordSource | |
|
|
134
|
+
| FLDs... | | FLDs... | |
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
### Schemas:
|
|
138
|
+
* `core` - framework-related code
|
|
139
|
+
* `stg` - staging layer for both virtual (views) and materialized (tables)
|
|
140
|
+
* `hub` - hub tables
|
|
141
|
+
* `sat` - satellite tables
|
|
142
|
+
* `dim` - dimension tables (information vault)
|
|
143
|
+
* `fact` - fact tables (information vault)
|
|
144
|
+
* `elt` - ELT procedures
|
|
145
|
+
* `job` - top level ELT procedures
|
|
146
|
+
* `meta` - metadata vault
|
|
147
|
+
* `proxy` - source data for a materialized staging area (meant for wrapping external data sources as SQL views)
|
|
148
|
+
|
|
149
|
+
### DV2-related schemas layering
|
|
150
|
+
data -> ELT -> report
|
|
151
|
+
|
|
152
|
+
| LoB* data | staging (E) | raw vault (L) | business vault (T) | information vault |
|
|
153
|
+
|-----------|-------------|---------------|--------------------|-------------------|
|
|
154
|
+
| | stg | hub | sal | dim |
|
|
155
|
+
| | proxy | sat | | fact |
|
|
156
|
+
| | pool | link | | |
|
|
157
|
+
_* Line of Business applications_
|
|
158
|
+
|
|
159
|
+
### Usage diagram
|
|
160
|
+
```text
|
|
161
|
+
+ +-----------+ automation
|
|
162
|
+
+---- + -------> | Dv2Utils | -------+------+
|
|
163
|
+
| + uses +-----------+ |
|
|
164
|
+
| + | uses | creates
|
|
165
|
+
| + v |
|
|
166
|
+
| + uses +-----------+ uses |
|
|
167
|
+
+---- + -------> | Dv2Helper | --------------+
|
|
168
|
+
| + +-----------+ |
|
|
169
|
+
o + | |
|
|
170
|
+
/|\ + | DDL | python
|
|
171
|
+
/ \ ==========================================================
|
|
172
|
+
DWH Dev + creates | | database
|
|
173
|
+
| + v V
|
|
174
|
+
| + uses +--------+ uses +---------------+
|
|
175
|
+
+---- + -------> | entity | -----> | core objects |
|
|
176
|
+
+ +--------+ +---------------+
|
|
177
|
+
+
|
|
178
|
+
|
|
179
|
+
```
|
|
@@ -13,6 +13,18 @@ Tool:
|
|
|
13
13
|
3. Generates ELT procedures to load data to the generated entities.
|
|
14
14
|
4. Generates support procedures such as `meta.Drop_all_related_to_[entity_name]` and `elt.Run_all_related_to_[entity_name]`
|
|
15
15
|
|
|
16
|
+
#### App design (layers):
|
|
17
|
+
DV2Modeler (service)
|
|
18
|
+
1. gets user input (stg) and analyzes it, producing `stg_info`
|
|
19
|
+
2. chooses strategy (`scd2dim`, `link2fact`)
|
|
20
|
+
|
|
21
|
+
Strategy (algorithm)
|
|
22
|
+
1. validates staging using `stg_info`
|
|
23
|
+
2. generates schema using dialects handler
|
|
24
|
+
|
|
25
|
+
Dialect handler (repository)
|
|
26
|
+
1. creates DB objects for postgres or MSSQL database
|
|
27
|
+
|
|
16
28
|
```text
|
|
17
29
|
+----------------------+
|
|
18
30
|
| hub.[entity_name] |
|
|
@@ -41,14 +53,14 @@ Tool:
|
|
|
41
53
|
* all other columns will be considered as business columns and will be included to the `sat` table definition
|
|
42
54
|
|
|
43
55
|
|
|
44
|
-
| staging fields | scd2dim profile |
|
|
45
|
-
|
|
46
|
-
| bk_ | ✅ |
|
|
47
|
-
| hk_`[entity_name]` | ✅ |
|
|
48
|
-
| LoadDate | ✅ |
|
|
49
|
-
| RecordSource | ✅ |
|
|
50
|
-
| HashDiff | ✅ |
|
|
51
|
-
| IsAvailable | ✅ |
|
|
56
|
+
| staging fields | scd2dim profile | link2fact profile |
|
|
57
|
+
|--------------------|-----------------|-------------------|
|
|
58
|
+
| bk_ | ✅ | |
|
|
59
|
+
| hk_`[entity_name]` | ✅ | |
|
|
60
|
+
| LoadDate | ✅ | |
|
|
61
|
+
| RecordSource | ✅ | |
|
|
62
|
+
| HashDiff | ✅ | |
|
|
63
|
+
| IsAvailable | ✅ | |
|
|
52
64
|
|
|
53
65
|
```sql
|
|
54
66
|
-- staging view example for the scd2dim profile (mssql)
|
|
@@ -77,7 +89,7 @@ select cast(31 as bigint) [bk_id]
|
|
|
77
89
|
, cast('2008-04-07 18:00:54.000' as datetime) [last_modified_at]
|
|
78
90
|
, cast(1 as bit) [IsAvailable]
|
|
79
91
|
```
|
|
80
|
-
### scd2dim profile
|
|
92
|
+
### scd2dim profile columns mapping:
|
|
81
93
|
| stg | hub | sat | dim |
|
|
82
94
|
|--------------------|------------------------|----------------------------|--------------------|
|
|
83
95
|
| | | | hk_`[entity_name]` |
|
|
@@ -92,35 +104,37 @@ select cast(31 as bigint) [bk_id]
|
|
|
92
104
|
| | | | (pk)DateFrom |
|
|
93
105
|
| | | | DateTo |
|
|
94
106
|
|
|
95
|
-
### link2fact profile
|
|
96
|
-
| stg | link
|
|
97
|
-
|
|
98
|
-
| HKs... | (uk)(fk)
|
|
99
|
-
| hk_`[entity_name]` | (pk)hk_`[entity_name]`
|
|
100
|
-
| degenerate_field
|
|
101
|
-
| LoadDate | LoadDate
|
|
102
|
-
| RecordSource | RecordSource
|
|
103
|
-
| FLDs... |
|
|
107
|
+
### link2fact profile columns mapping:
|
|
108
|
+
| stg | link | sat | fact |
|
|
109
|
+
|--------------------|--------------------------------|----------------------------|------|
|
|
110
|
+
| HKs... | (uk)(fk)hk_`other_entity_name` | | |
|
|
111
|
+
| hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
|
|
112
|
+
| <degenerate_field> | (uk)<degenerate_field> | <degenerate_field> | |
|
|
113
|
+
| LoadDate | LoadDate | LoadDate | |
|
|
114
|
+
| RecordSource | RecordSource | RecordSource | |
|
|
115
|
+
| FLDs... | | FLDs... | |
|
|
104
116
|
|
|
105
117
|
|
|
106
118
|
### Schemas:
|
|
107
119
|
* `core` - framework-related code
|
|
108
120
|
* `stg` - staging layer for both virtual (views) and materialized (tables)
|
|
109
|
-
* `hub` - hub
|
|
110
|
-
* `sat` - satellite
|
|
111
|
-
* `dim` - dimension
|
|
112
|
-
* `fact` - fact
|
|
121
|
+
* `hub` - hub tables
|
|
122
|
+
* `sat` - satellite tables
|
|
123
|
+
* `dim` - dimension tables (information vault)
|
|
124
|
+
* `fact` - fact tables (information vault)
|
|
113
125
|
* `elt` - ELT procedures
|
|
114
126
|
* `job` - top level ELT procedures
|
|
115
127
|
* `meta` - metadata vault
|
|
116
|
-
* `proxy` - source data for materialized staging area (meant for wrapping external data sources as SQL views)
|
|
128
|
+
* `proxy` - source data for a materialized staging area (meant for wrapping external data sources as SQL views)
|
|
117
129
|
|
|
118
130
|
### DV2-related schemas layering
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
|
122
|
-
|
|
123
|
-
|
|
|
131
|
+
data -> ELT -> report
|
|
132
|
+
|
|
133
|
+
| LoB* data | staging (E) | raw vault (L) | business vault (T) | information vault |
|
|
134
|
+
|-----------|-------------|---------------|--------------------|-------------------|
|
|
135
|
+
| | stg | hub | sal | dim |
|
|
136
|
+
| | proxy | sat | | fact |
|
|
137
|
+
| | pool | link | | |
|
|
124
138
|
_* Line of Business applications_
|
|
125
139
|
|
|
126
140
|
### Usage diagram
|
|
@@ -143,4 +157,4 @@ _* Line of Business applications_
|
|
|
143
157
|
+ +--------+ +---------------+
|
|
144
158
|
+
|
|
145
159
|
|
|
146
|
-
```
|
|
160
|
+
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sandwich"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
description = "DataVault 2.0 code gen"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.14"
|
|
@@ -11,9 +11,10 @@ keywords = ["DWH", "Data Vault 2.0"]
|
|
|
11
11
|
classifiers = [
|
|
12
12
|
"Programming Language :: Python :: 3",
|
|
13
13
|
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Topic :: Database",
|
|
14
15
|
"Operating System :: OS Independent",
|
|
15
16
|
"Environment :: Console",
|
|
16
|
-
"Development Status ::
|
|
17
|
+
"Development Status :: 2 - Pre-Alpha",
|
|
17
18
|
"Intended Audience :: Developers",
|
|
18
19
|
"Typing :: Typed",
|
|
19
20
|
]
|
|
@@ -30,9 +31,12 @@ dev = [
|
|
|
30
31
|
"python-dotenv",
|
|
31
32
|
]
|
|
32
33
|
|
|
34
|
+
[project.scripts]
|
|
35
|
+
sandwich = "sandwich:main"
|
|
36
|
+
|
|
33
37
|
[build-system]
|
|
34
|
-
requires = ["
|
|
35
|
-
build-backend = "
|
|
38
|
+
requires = ["uv_build>=0.9.11,<0.10.0"]
|
|
39
|
+
build-backend = "uv_build"
|
|
36
40
|
|
|
37
41
|
[tool.hatch.build]
|
|
38
42
|
include = [
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Dialects package for SQL code generation."""
|
|
2
|
+
from src.sandwich.dialects.base import DialectHandler
|
|
3
|
+
from src.sandwich.dialects.factory import DialectHandlerFactory
|
|
4
|
+
from src.sandwich.dialects.mssql import MssqlDialectHandler
|
|
5
|
+
from src.sandwich.dialects.postgres import PostgresDialectHandler
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"DialectHandler",
|
|
9
|
+
"DialectHandlerFactory",
|
|
10
|
+
"MssqlDialectHandler",
|
|
11
|
+
"PostgresDialectHandler",
|
|
12
|
+
]
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Tuple
|
|
3
|
+
|
|
4
|
+
from sqlalchemy import Table, TextClause
|
|
5
|
+
|
|
6
|
+
class DialectHandler(ABC):
|
|
7
|
+
@abstractmethod
|
|
8
|
+
def get_boolean_type(self): ...
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def get_proc_name_format(self, schema: str, operation: str, entity_name: str) -> str:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def apply_proc_template(self, proc_name: str, sql_body: str, header: str) -> str:
|
|
16
|
+
"""Wrap SQL body in procedure template with error handling and logging.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
proc_name: Name of the procedure
|
|
20
|
+
sql_body: The main SQL logic to execute
|
|
21
|
+
header: Auto-generated header comment
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Complete procedure definition
|
|
25
|
+
"""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def make_stg_materialization_proc(
|
|
30
|
+
self,
|
|
31
|
+
entity_name: str,
|
|
32
|
+
header: str
|
|
33
|
+
) -> Tuple[str, str, str]:
|
|
34
|
+
"""Generate staging table materialization procedure.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
entity_name: Entity name
|
|
38
|
+
columns_list: Comma-separated list of columns
|
|
39
|
+
header: Auto-generated header comment
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Tuple of (procedure_code, procedure_name)
|
|
43
|
+
"""
|
|
44
|
+
pass
|
|
45
|
+
|
|
46
|
+
@abstractmethod
|
|
47
|
+
def make_hub_proc(
|
|
48
|
+
self,
|
|
49
|
+
hub_table: Table,
|
|
50
|
+
bk_keys: list,
|
|
51
|
+
header: str
|
|
52
|
+
) -> Tuple[str, str, str]:
|
|
53
|
+
"""Generate hub population procedure.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
hub_table: SQLAlchemy Table object for hub
|
|
57
|
+
bk_keys: List of business key tuples (name, type)
|
|
58
|
+
columns_list: Comma-separated list of columns
|
|
59
|
+
header: Auto-generated header comment
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Tuple of (procedure_code, procedure_name)
|
|
63
|
+
"""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
@abstractmethod
|
|
67
|
+
def make_link_proc(
|
|
68
|
+
self,
|
|
69
|
+
link_table: Table,
|
|
70
|
+
hk_keys: list,
|
|
71
|
+
header: str
|
|
72
|
+
) -> Tuple[str, str, str]:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@abstractmethod
|
|
76
|
+
def make_scd2_sat_proc(
|
|
77
|
+
self,
|
|
78
|
+
sat_table: Table,
|
|
79
|
+
hk_name: str,
|
|
80
|
+
hashdiff_col: str,
|
|
81
|
+
is_available_col: str,
|
|
82
|
+
loaddate_col: str,
|
|
83
|
+
stg_schema: str,
|
|
84
|
+
header: str
|
|
85
|
+
) -> Tuple[str, str, str]:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
@abstractmethod
|
|
89
|
+
def make_scd0_sat_proc(self, sat_table: Table, header: str) -> Tuple[str, str, str]:
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
@abstractmethod
|
|
93
|
+
def make_scd2_dim_proc(
|
|
94
|
+
self,
|
|
95
|
+
dim_table: Table,
|
|
96
|
+
bk_keys: list,
|
|
97
|
+
header: str
|
|
98
|
+
) -> Tuple[str, str, str]:
|
|
99
|
+
"""Generate dimension SCD2 recalculation procedure.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
dim_table: SQLAlchemy Table object for dimension
|
|
103
|
+
bk_keys: List of business key tuples (name, type)
|
|
104
|
+
columns_list: Comma-separated list of columns
|
|
105
|
+
header: Auto-generated header comment
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Tuple of (procedure_code, procedure_name)
|
|
109
|
+
"""
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def make_job_proc(
|
|
114
|
+
self,
|
|
115
|
+
entity_name: str,
|
|
116
|
+
proc_names: list[str],
|
|
117
|
+
header: str
|
|
118
|
+
) -> Tuple[str, str, str]:
|
|
119
|
+
"""Generate main job orchestration procedure.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
entity_name: Entity name
|
|
123
|
+
hub_proc_name: Name of hub population procedure
|
|
124
|
+
sat_proc_name: Name of satellite population procedure
|
|
125
|
+
dim_proc_name: Name of dimension recalculation procedure
|
|
126
|
+
stg_proc_name: Name of staging materialization procedure (optional)
|
|
127
|
+
header: Auto-generated header comment
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Tuple of (procedure_code, procedure_name)
|
|
131
|
+
"""
|
|
132
|
+
pass
|
|
133
|
+
|
|
134
|
+
@abstractmethod
|
|
135
|
+
def make_drop_proc(self, entity_name, table_schemas: list[str], procedures: list[str], header: str) \
|
|
136
|
+
-> Tuple[str, str, str]: ...
|