sandwich 0.1.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
- # Python-generated files
2
- __pycache__/
3
- *.py[oc]
4
- build/
5
- dist/
6
- wheels/
7
- *.egg-info
8
-
9
- # Virtual environments
10
- .venv
11
-
12
- # config
13
- .env
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+
12
+ # config
13
+ .env
@@ -0,0 +1,164 @@
1
+ Metadata-Version: 2.4
2
+ Name: sandwich
3
+ Version: 0.2.1
4
+ Summary: DataVault 2.0 code gen
5
+ Author-email: Andrey Morozov <andrey@morozov.lv>
6
+ License-File: LICENSE
7
+ Keywords: DWH,Data Vault 2.0
8
+ Classifier: Development Status :: 1 - Planning
9
+ Classifier: Environment :: Console
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Typing :: Typed
15
+ Requires-Python: >=3.14
16
+ Requires-Dist: sqlalchemy
17
+ Description-Content-Type: text/markdown
18
+
19
+ ## Data Vault 2.0 scaffolding tool
20
+ This tool is designed to streamline the process of creating Data Vault 2.0 entities,
21
+ such as hubs, links, and satellites.
22
+ As well as building information layer objects such as dim and fact tables
23
+ from a multidimensional paradigm.
24
+
25
+ ### How it works:
26
+ User: provides a staging view `stg.[entity_name]` (or a table if the staging layer persisted)
27
+ with all requirements for the `[entity_name]` defined in the schema (how to define see below).
28
+ Tool:
29
+ 1. Validates metadata of the provided staging view or table.
30
+ 2. Generates the necessary DDL statements to create the Data Vault 2.0 entities.
31
+ 3. Generates ELT procedures to load data to the generated entities.
32
+ 4. Generates support procedures such as `meta.Drop_all_related_to_[entity_name]` and `elt.Run_all_related_to_[entity_name]`
33
+
34
+ ```text
35
+ +----------------------+
36
+ | hub.[entity_name] |
37
+ +----------------------+
38
+ ^
39
+ o 1.define +-------------------+ | 3.create
40
+ /|\ -------> | stg.[entity_name] | # +----------------------+
41
+ / \ +-------------------+ /|\ ---------> | sat.[entity_name] |
42
+ User ---------------------------------------> / \ 3.create +----------------------+
43
+ 2.use Tool
44
+ | 3.create
45
+ v
46
+ +----------------------+
47
+ | dim.[entity_name] |
48
+ +----------------------+
49
+
50
+ ```
51
+
52
+ ### How to define a staging view or table:
53
+ * `bk_` (BusinessKey) - at least one `bk_` column
54
+ * `hk_[entity_name]` (HashKey) - exactly one `hk_[entity_name]` column if you want a `hub` table created
55
+ * `LoadDate` - required by dv2 standard for an auditability
56
+ * `RecordSource` - required by dv2 standard for an auditability
57
+ * `HashDiff` - optional, required if you want to have a scd2 type `dim` table created
58
+ * `IsAvailable` - optional, required if you want to track missing/deleted records
59
+ * all other columns will be considered as business columns and will be included to the `sat` table definition
60
+
61
+
62
+ | staging fields | scd2dim profile |
63
+ |--------------------|-----------------|
64
+ | bk_ | ✅ |
65
+ | hk_`[entity_name]` | ✅ |
66
+ | LoadDate | ✅ |
67
+ | RecordSource | ✅ |
68
+ | HashDiff | ✅ |
69
+ | IsAvailable | ✅ |
70
+
71
+ ```sql
72
+ -- staging view example for the scd2dim profile (mssql)
73
+ create view [stg].[UR_officers] as
74
+ select cast(31 as bigint) [bk_id]
75
+ , core.StringToHash1(cast(31 as bigint)) [hk_UR_officers]
76
+ , sysdatetime() [LoadDate]
77
+ , cast('LobSystem.dbo.officers_daily' as varchar(200)) [RecordSource]
78
+ , core.StringToHash8(
79
+ cast('uri' as nvarchar(100))
80
+ , cast('00000000000000' as varchar(20))
81
+ , cast('NATURAL_PERSON' as varchar(50))
82
+ , cast(null as varchar(20))
83
+ , cast('INDIVIDUALLY' as varchar(50))
84
+ , cast(0 as int)
85
+ , cast('2008-04-07' as date)
86
+ , cast('2008-04-07 18:00:54.000' as datetime)
87
+ ) [HashDiff]
88
+ , cast('uri' as nvarchar(100)) [uri]
89
+ , cast('00000000000000' as varchar(20)) [at_legal_entity_registration_number]
90
+ , cast('NATURAL_PERSON' as varchar(50)) [entity_type]
91
+ , cast(null as varchar(20)) [legal_entity_registration_number]
92
+ , cast('INDIVIDUALLY' as varchar(50)) [rights_of_representation_type]
93
+ , cast(0 as int) [representation_with_at_least]
94
+ , cast('2008-04-07' as date) [registered_on]
95
+ , cast('2008-04-07 18:00:54.000' as datetime) [last_modified_at]
96
+ , cast(1 as bit) [IsAvailable]
97
+ ```
98
+ ### scd2dim profile example:
99
+ | stg | hub | sat | dim |
100
+ |--------------------|------------------------|----------------------------|--------------------|
101
+ | | | | hk_`[entity_name]` |
102
+ | BKs... | (uk)BKs... | BKs... | (pk)BKs... |
103
+ | hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
104
+ | LoadDate | LoadDate | (pk)LoadDate | |
105
+ | RecordSource | RecordSource | RecordSource | |
106
+ | HashDiff | | HashDiff | |
107
+ | FLDs... | | FLDs... | FLDs... |
108
+ | IsAvailable | | IsAvailable | IsAvailable |
109
+ | | | | IsCurrent |
110
+ | | | | (pk)DateFrom |
111
+ | | | | DateTo |
112
+
113
+ ### link2fact profile example:
114
+ | stg | link | sat | fact |
115
+ |--------------------|------------------------|----------------------------|------|
116
+ | HKs... | (uk)(fk)HKs... | | |
117
+ | hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
118
+ | degenerate_field | (uk)degenerate_field | degenerate_field | |
119
+ | LoadDate | LoadDate | LoadDate | |
120
+ | RecordSource | RecordSource | RecordSource | |
121
+ | FLDs... | | FLDs... | |
122
+
123
+
124
+ ### Schemas:
125
+ * `core` - framework-related code
126
+ * `stg` - staging layer for both virtual (views) and materialized (tables)
127
+ * `hub` - hub layer
128
+ * `sat` - satellite layer
129
+ * `dim` - dimension layer (information vault)
130
+ * `fact` - fact layer (information vault)
131
+ * `elt` - ELT procedures
132
+ * `job` - top level ELT procedures
133
+ * `meta` - metadata vault
134
+ * `proxy` - source data for materialized staging area (meant for wrapping external data sources as SQL views)
135
+
136
+ ### DV2-related schemas layering
137
+ | LoB* | staging | raw vault | business vault | information vault |
138
+ |-------|---------|-----------|----------------|-------------------|
139
+ | proxy | stg | hub | sal | dim |
140
+ | | | sat | | fact |
141
+ | | | link | | |
142
+ _* Line of Business applications_
143
+
144
+ ### Usage diagram
145
+ ```text
146
+ + +-----------+ automation
147
+ +---- + -------> | Dv2Utils | -------+------+
148
+ | + uses +-----------+ |
149
+ | + | uses | creates
150
+ | + v |
151
+ | + uses +-----------+ uses |
152
+ +---- + -------> | Dv2Helper | --------------+
153
+ | + +-----------+ |
154
+ o + | |
155
+ /|\ + | DDL | python
156
+ / \ ==========================================================
157
+ DWH Dev + creates | | database
158
+ | + v V
159
+ | + uses +--------+ uses +---------------+
160
+ +---- + -------> | entity | -----> | core objects |
161
+ + +--------+ +---------------+
162
+ +
163
+
164
+ ```
@@ -0,0 +1,146 @@
1
+ ## Data Vault 2.0 scaffolding tool
2
+ This tool is designed to streamline the process of creating Data Vault 2.0 entities,
3
+ such as hubs, links, and satellites.
4
+ As well as building information layer objects such as dim and fact tables
5
+ from a multidimensional paradigm.
6
+
7
+ ### How it works:
8
+ User: provides a staging view `stg.[entity_name]` (or a table if the staging layer persisted)
9
+ with all requirements for the `[entity_name]` defined in the schema (how to define see below).
10
+ Tool:
11
+ 1. Validates metadata of the provided staging view or table.
12
+ 2. Generates the necessary DDL statements to create the Data Vault 2.0 entities.
13
+ 3. Generates ELT procedures to load data to the generated entities.
14
+ 4. Generates support procedures such as `meta.Drop_all_related_to_[entity_name]` and `elt.Run_all_related_to_[entity_name]`
15
+
16
+ ```text
17
+ +----------------------+
18
+ | hub.[entity_name] |
19
+ +----------------------+
20
+ ^
21
+ o 1.define +-------------------+ | 3.create
22
+ /|\ -------> | stg.[entity_name] | # +----------------------+
23
+ / \ +-------------------+ /|\ ---------> | sat.[entity_name] |
24
+ User ---------------------------------------> / \ 3.create +----------------------+
25
+ 2.use Tool
26
+ | 3.create
27
+ v
28
+ +----------------------+
29
+ | dim.[entity_name] |
30
+ +----------------------+
31
+
32
+ ```
33
+
34
+ ### How to define a staging view or table:
35
+ * `bk_` (BusinessKey) - at least one `bk_` column
36
+ * `hk_[entity_name]` (HashKey) - exactly one `hk_[entity_name]` column if you want a `hub` table created
37
+ * `LoadDate` - required by dv2 standard for an auditability
38
+ * `RecordSource` - required by dv2 standard for an auditability
39
+ * `HashDiff` - optional, required if you want to have a scd2 type `dim` table created
40
+ * `IsAvailable` - optional, required if you want to track missing/deleted records
41
+ * all other columns will be considered as business columns and will be included to the `sat` table definition
42
+
43
+
44
+ | staging fields | scd2dim profile |
45
+ |--------------------|-----------------|
46
+ | bk_ | ✅ |
47
+ | hk_`[entity_name]` | ✅ |
48
+ | LoadDate | ✅ |
49
+ | RecordSource | ✅ |
50
+ | HashDiff | ✅ |
51
+ | IsAvailable | ✅ |
52
+
53
+ ```sql
54
+ -- staging view example for the scd2dim profile (mssql)
55
+ create view [stg].[UR_officers] as
56
+ select cast(31 as bigint) [bk_id]
57
+ , core.StringToHash1(cast(31 as bigint)) [hk_UR_officers]
58
+ , sysdatetime() [LoadDate]
59
+ , cast('LobSystem.dbo.officers_daily' as varchar(200)) [RecordSource]
60
+ , core.StringToHash8(
61
+ cast('uri' as nvarchar(100))
62
+ , cast('00000000000000' as varchar(20))
63
+ , cast('NATURAL_PERSON' as varchar(50))
64
+ , cast(null as varchar(20))
65
+ , cast('INDIVIDUALLY' as varchar(50))
66
+ , cast(0 as int)
67
+ , cast('2008-04-07' as date)
68
+ , cast('2008-04-07 18:00:54.000' as datetime)
69
+ ) [HashDiff]
70
+ , cast('uri' as nvarchar(100)) [uri]
71
+ , cast('00000000000000' as varchar(20)) [at_legal_entity_registration_number]
72
+ , cast('NATURAL_PERSON' as varchar(50)) [entity_type]
73
+ , cast(null as varchar(20)) [legal_entity_registration_number]
74
+ , cast('INDIVIDUALLY' as varchar(50)) [rights_of_representation_type]
75
+ , cast(0 as int) [representation_with_at_least]
76
+ , cast('2008-04-07' as date) [registered_on]
77
+ , cast('2008-04-07 18:00:54.000' as datetime) [last_modified_at]
78
+ , cast(1 as bit) [IsAvailable]
79
+ ```
80
+ ### scd2dim profile example:
81
+ | stg | hub | sat | dim |
82
+ |--------------------|------------------------|----------------------------|--------------------|
83
+ | | | | hk_`[entity_name]` |
84
+ | BKs... | (uk)BKs... | BKs... | (pk)BKs... |
85
+ | hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
86
+ | LoadDate | LoadDate | (pk)LoadDate | |
87
+ | RecordSource | RecordSource | RecordSource | |
88
+ | HashDiff | | HashDiff | |
89
+ | FLDs... | | FLDs... | FLDs... |
90
+ | IsAvailable | | IsAvailable | IsAvailable |
91
+ | | | | IsCurrent |
92
+ | | | | (pk)DateFrom |
93
+ | | | | DateTo |
94
+
95
+ ### link2fact profile example:
96
+ | stg | link | sat | fact |
97
+ |--------------------|------------------------|----------------------------|------|
98
+ | HKs... | (uk)(fk)HKs... | | |
99
+ | hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
100
+ | degenerate_field | (uk)degenerate_field | degenerate_field | |
101
+ | LoadDate | LoadDate | LoadDate | |
102
+ | RecordSource | RecordSource | RecordSource | |
103
+ | FLDs... | | FLDs... | |
104
+
105
+
106
+ ### Schemas:
107
+ * `core` - framework-related code
108
+ * `stg` - staging layer for both virtual (views) and materialized (tables)
109
+ * `hub` - hub layer
110
+ * `sat` - satellite layer
111
+ * `dim` - dimension layer (information vault)
112
+ * `fact` - fact layer (information vault)
113
+ * `elt` - ELT procedures
114
+ * `job` - top level ELT procedures
115
+ * `meta` - metadata vault
116
+ * `proxy` - source data for materialized staging area (meant for wrapping external data sources as SQL views)
117
+
118
+ ### DV2-related schemas layering
119
+ | LoB* | staging | raw vault | business vault | information vault |
120
+ |-------|---------|-----------|----------------|-------------------|
121
+ | proxy | stg | hub | sal | dim |
122
+ | | | sat | | fact |
123
+ | | | link | | |
124
+ _* Line of Business applications_
125
+
126
+ ### Usage diagram
127
+ ```text
128
+ + +-----------+ automation
129
+ +---- + -------> | Dv2Utils | -------+------+
130
+ | + uses +-----------+ |
131
+ | + | uses | creates
132
+ | + v |
133
+ | + uses +-----------+ uses |
134
+ +---- + -------> | Dv2Helper | --------------+
135
+ | + +-----------+ |
136
+ o + | |
137
+ /|\ + | DDL | python
138
+ / \ ==========================================================
139
+ DWH Dev + creates | | database
140
+ | + v V
141
+ | + uses +--------+ uses +---------------+
142
+ +---- + -------> | entity | -----> | core objects |
143
+ + +--------+ +---------------+
144
+ +
145
+
146
+ ```
@@ -1,38 +1,40 @@
1
- [project]
2
- name = "sandwich"
3
- version = "0.1.0"
4
- description = "DataVault 2.0 code gen"
5
- readme = "README.md"
6
- requires-python = ">=3.12"
7
- authors = [
8
- { name = "Andrey Morozov", email = "andrey@morozov.lv" }
9
- ]
10
- keywords = ["DWH", "Data Vault 2.0"]
11
- classifiers = [
12
- "Programming Language :: Python :: 3",
13
- "License :: OSI Approved :: MIT License",
14
- "Operating System :: OS Independent",
15
- "Environment :: Console",
16
- "Development Status :: 1 - Planning",
17
- "Intended Audience :: Developers",
18
- ]
19
- dependencies = [
20
- "pyodbc>=5.3.0",
21
- "sqlalchemy>=2.0.44",
22
- ]
23
-
24
- [dependency-groups]
25
- dev = [
26
- "dotenv>=0.9.9",
27
- "mypy>=1.19.0",
28
- "pytest>=9.0.1",
29
- ]
30
-
31
- [build-system]
32
- requires = ["hatchling"]
33
- build-backend = "hatchling.build"
34
-
35
- [tool.hatch.build]
36
- include = [
37
- "sandwich/py.typed",
38
- ]
1
+ [project]
2
+ name = "sandwich"
3
+ version = "0.2.1"
4
+ description = "DataVault 2.0 code gen"
5
+ readme = "README.md"
6
+ requires-python = ">=3.14"
7
+ authors = [
8
+ { name = "Andrey Morozov", email = "andrey@morozov.lv" }
9
+ ]
10
+ keywords = ["DWH", "Data Vault 2.0"]
11
+ classifiers = [
12
+ "Programming Language :: Python :: 3",
13
+ "License :: OSI Approved :: MIT License",
14
+ "Operating System :: OS Independent",
15
+ "Environment :: Console",
16
+ "Development Status :: 1 - Planning",
17
+ "Intended Audience :: Developers",
18
+ "Typing :: Typed",
19
+ ]
20
+ dependencies = [
21
+ "sqlalchemy",
22
+ ]
23
+
24
+ [dependency-groups]
25
+ dev = [
26
+ "mypy",
27
+ "psycopg2-binary",
28
+ "pyodbc",
29
+ "pytest",
30
+ "python-dotenv",
31
+ ]
32
+
33
+ [build-system]
34
+ requires = ["hatchling"]
35
+ build-backend = "hatchling.build"
36
+
37
+ [tool.hatch.build]
38
+ include = [
39
+ "sandwich/py.typed",
40
+ ]
sandwich-0.1.0/PKG-INFO DELETED
@@ -1,16 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: sandwich
3
- Version: 0.1.0
4
- Summary: DataVault 2.0 code gen
5
- Author-email: Andrey Morozov <andrey@morozov.lv>
6
- License-File: LICENSE
7
- Keywords: DWH,Data Vault 2.0
8
- Classifier: Development Status :: 1 - Planning
9
- Classifier: Environment :: Console
10
- Classifier: Intended Audience :: Developers
11
- Classifier: License :: OSI Approved :: MIT License
12
- Classifier: Operating System :: OS Independent
13
- Classifier: Programming Language :: Python :: 3
14
- Requires-Python: >=3.12
15
- Requires-Dist: pyodbc>=5.3.0
16
- Requires-Dist: sqlalchemy>=2.0.44
sandwich-0.1.0/README.md DELETED
File without changes
File without changes
File without changes