sandwich 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ """Link to Fact strategy implementations."""
2
+ from datetime import datetime
3
+ from typing import Tuple
4
+
5
+ from sqlalchemy import Column, MetaData, Table, UniqueConstraint
6
+
7
+ from sandwich import SANDWICH_VERSION
8
+ from sandwich.dialects.base import DialectHandler
9
+ from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
10
+
11
+ from .base import BaseValidator, SchemaGenerator, ValidationResult
12
+
13
+
14
+ class Link2FactValidator(BaseValidator):
15
+ def __init__(self, template: str):
16
+ super().__init__(template)
17
+ self._on_validate_staging = self._validate_staging
18
+
19
+ @staticmethod
20
+ def _validate_staging(stg_info: StgInfo, sys_info: Dv2SystemInfo) -> None:
21
+ # -----------------
22
+ # hk
23
+ # -----------------
24
+ hk_count = len(stg_info.hk_keys)
25
+ stg_full_name = f"stg.{stg_info.stg_name}"
26
+
27
+ # so only exactly 2 or 3 columns allowed right now
28
+ if hk_count < 2: # own and at least one foreign
29
+ raise Exception(f"At least 2 hk columns expected in `{stg_full_name}` for the `link2fact` template")
30
+ if hk_count > 3:
31
+ raise Exception(f"{hk_count} hk columns in `{stg_full_name}` for the `link2fact` template?! Are you sure?")
32
+ if stg_info.degenerate_field is None:
33
+ raise Exception(f"Degenerate field is required for `{stg_full_name}` for the `link2fact` template")
34
+
35
+ # hk_key = (key_name, key_type)
36
+ hk_keys_copy = list(stg_info.hk_keys.keys()).copy()
37
+ is_own_hk_found = False
38
+ expected_own_hk_column_name = f"hk_{stg_info.stg_name}"
39
+
40
+ for hk_name in stg_info.hk_keys.keys():
41
+ if hk_name == expected_own_hk_column_name:
42
+ hk_keys_copy.remove(hk_name)
43
+ is_own_hk_found = True
44
+ else:
45
+ # check that `name` from `hk_[name]` is existing entity
46
+ for en in sys_info.entities_list:
47
+ if hk_name == f"hk_{en.entity_name}":
48
+ hk_keys_copy.remove(hk_name)
49
+
50
+ if not is_own_hk_found:
51
+ raise Exception(f"Column `{expected_own_hk_column_name}` has not been found in `{stg_full_name}`")
52
+
53
+ if len(hk_keys_copy) > 0:
54
+ raise Exception(f"There are no entities related to `{", ".join(hk_keys_copy)}` columns in the `{stg_full_name}`")
55
+
56
+ # -----------------
57
+ # BKs
58
+ # -----------------
59
+ # it should be a warning, not an error
60
+ if len(stg_info.bk_keys) > 0:
61
+ raise Exception("You dont need bk columns for the `link2fact` template")
62
+
63
+
64
+ class Link2FactSchemaGenerator(SchemaGenerator):
65
+
66
+ def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
67
+ self.dialect_handler = dialect_handler
68
+ self._validation_result = validation_result
69
+
70
+ def make_tables(self) -> dict[str, Table]:
71
+ return {
72
+ "link": self.make_link_table(),
73
+ "sat": self.make_sat_table(),
74
+ #"fact": self.make_fact_table(),
75
+ }
76
+
77
+ def make_link_table(self) -> Table:
78
+ entity_name = self._validation_result.entity_name
79
+
80
+ # Create link table
81
+ link_table = Table(entity_name, MetaData(), schema="link")
82
+ uks: list[str] = []
83
+
84
+ # HKs (own and FKs)
85
+ for hk_key in self._validation_result.hk_keys:
86
+ if hk_key[0] == f"hk_{entity_name}":
87
+ col = Column(hk_key[0], hk_key[1], primary_key=True)
88
+ else:
89
+ uks.append(hk_key[0])
90
+ col = Column(hk_key[0], hk_key[1], nullable=False)
91
+ link_table.append_column(col)
92
+ dg_key = self._validation_result.degenerate_field
93
+ link_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
94
+ uks.append(dg_key[0])
95
+ link_table.append_constraint(UniqueConstraint(*uks))
96
+
97
+ # LoadDate
98
+ load_date = modeling_metadata.loaddate
99
+ load_date_type = self._validation_result.system_column_types[load_date]
100
+ load_date_col = Column(load_date, load_date_type, nullable=False)
101
+ link_table.append_column(load_date_col)
102
+
103
+ # RecordSource
104
+ record_source = modeling_metadata.recordsource
105
+ record_source_type = self._validation_result.system_column_types[record_source]
106
+ record_source_col = Column(record_source, record_source_type, nullable=False)
107
+ link_table.append_column(record_source_col)
108
+
109
+ return link_table
110
+
111
+ def make_sat_table(self) -> Table:
112
+ entity_name = self._validation_result.entity_name
113
+
114
+ # Create sat table
115
+ sat_table = Table(entity_name, MetaData(), schema="sat")
116
+
117
+ # own HK
118
+ for hk_key in self._validation_result.hk_keys:
119
+ if hk_key[0] == f"hk_{entity_name}":
120
+ col = Column(hk_key[0], hk_key[1], primary_key=True)
121
+ sat_table.append_column(col)
122
+ # for transactional links
123
+ dg_key = self._validation_result.degenerate_field
124
+ if dg_key is not None:
125
+ sat_table.append_column(Column(dg_key[0], dg_key[1], nullable=False))
126
+
127
+
128
+ # LoadDate
129
+ load_date = modeling_metadata.loaddate
130
+ load_date_type = self._validation_result.system_column_types[load_date]
131
+ load_date_col = Column(load_date, load_date_type, nullable=False)
132
+ sat_table.append_column(load_date_col)
133
+
134
+ # RecordSource
135
+ record_source = modeling_metadata.recordsource
136
+ record_source_type = self._validation_result.system_column_types[record_source]
137
+ record_source_col = Column(record_source, record_source_type, nullable=False)
138
+ sat_table.append_column(record_source_col)
139
+
140
+ for (name_, type_) in self._validation_result.business_column_types.items():
141
+ col = Column(name_, type_, nullable=True)
142
+ sat_table.append_column(col)
143
+
144
+ return sat_table
145
+
146
+ def make_fact_table(self) -> Table:
147
+ entity_name = self._validation_result.entity_name
148
+
149
+ # Create a fact table
150
+ fact_table = Table(entity_name, MetaData(), schema="fact")
151
+
152
+ # not own HKs only
153
+ for hk_key in self._validation_result.hk_keys:
154
+ if hk_key[0] != f"hk_{entity_name}":
155
+ col = Column(hk_key[0], hk_key[1], primary_key=True)
156
+ fact_table.append_column(col)
157
+
158
+ for (name_, type_) in self._validation_result.business_column_types.items():
159
+ col = Column(name_, type_, nullable=True)
160
+ fact_table.append_column(col)
161
+
162
+ return fact_table
163
+
164
+ def make_procedures(self, tables: dict[str, Table]
165
+ , entity_registration_date: datetime = datetime.now()) -> dict[str, Tuple[str, str, str]]:
166
+ procedures = {}
167
+
168
+ header = modeling_metadata.HEADER_TEMPLATE.format(
169
+ created_on=entity_registration_date,
170
+ updated_on=datetime.now(),
171
+ version=SANDWICH_VERSION,
172
+ entity_name=self._validation_result.entity_name
173
+ )
174
+
175
+ stg_proc_name = None
176
+ if self._validation_result.stg_schema == "proxy":
177
+ stg_proc_code, stg_proc_name, stg_call_stmt = self.dialect_handler.make_stg_materialization_proc(
178
+ entity_name=self._validation_result.entity_name,
179
+ header=header
180
+ )
181
+ procedures["stg"] = (stg_proc_code, stg_proc_name, stg_call_stmt)
182
+
183
+ link_table = tables["link"]
184
+ link_proc_code, link_proc_name, link_call_stmt = self.dialect_handler.make_link_proc(
185
+ link_table=link_table,
186
+ hk_keys=self._validation_result.hk_keys + [self._validation_result.degenerate_field],
187
+ header=header
188
+ )
189
+ procedures["link"] = (link_proc_code, link_proc_name, link_call_stmt)
190
+
191
+ sat_table = tables["sat"]
192
+ sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd0_sat_proc(
193
+ sat_table=sat_table,
194
+ header=header
195
+ )
196
+ procedures["sat"] = (sat_proc_code, sat_proc_name, sat_call_stmt)
197
+
198
+ # job procedure
199
+ job_proc_names = [] # order-sensitive
200
+ if self._validation_result.stg_schema == "proxy":
201
+ job_proc_names.append(stg_proc_name)
202
+ job_proc_names.extend([link_proc_name, sat_proc_name])
203
+ job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
204
+ entity_name=self._validation_result.entity_name,
205
+ proc_names=job_proc_names,
206
+ header=header
207
+ )
208
+ procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
209
+
210
+ # drop procedure
211
+ drop_table_schemas = ["link", "sat"]
212
+ if self._validation_result.stg_schema == "proxy":
213
+ drop_table_schemas.append("stg")
214
+ drop_proc_names = [link_proc_name, sat_proc_name]
215
+ if self._validation_result.stg_schema == "proxy":
216
+ drop_proc_names.append(stg_proc_name)
217
+ drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
218
+ entity_name=self._validation_result.entity_name,
219
+ table_schemas=drop_table_schemas,
220
+ procedures=drop_proc_names,
221
+ header=header
222
+ )
223
+ procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
224
+
225
+ return procedures
@@ -3,22 +3,20 @@ from typing import Iterator, Tuple
3
3
 
4
4
  from sqlalchemy import Column, MetaData, Table, UniqueConstraint
5
5
 
6
- from src.sandwich import SANDWICH_VERSION
7
- from src.sandwich.dialects.base import DialectHandler
8
- from src.sandwich.modeling import modeling_metadata, StgInfo
6
+ from sandwich import SANDWICH_VERSION
7
+ from sandwich.dialects.base import DialectHandler
8
+ from sandwich.modeling import modeling_metadata, StgInfo, Dv2SystemInfo
9
9
 
10
- from .base import Validator, SchemaGenerator, ValidationResult
10
+ from .base import BaseValidator, SchemaGenerator, ValidationResult
11
11
 
12
12
 
13
- class Scd2DimValidator(Validator):
14
-
15
- def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
16
- """Validate staging table or view for `scd2dim` template.
17
-
18
- Raises: Exception"""
19
- if verbose:
20
- raise Exception("verbose is not implemented yet")
13
+ class Scd2DimValidator(BaseValidator):
14
+ def __init__(self, template: str):
15
+ super().__init__(template)
16
+ self._on_validate_staging = self._validate_staging
21
17
 
18
+ @staticmethod
19
+ def _validate_staging(stg_info: StgInfo, _: Dv2SystemInfo) -> None:
22
20
  # -----------------
23
21
  # hk
24
22
  # -----------------
@@ -47,40 +45,21 @@ class Scd2DimValidator(Validator):
47
45
 
48
46
  system_column_names = stg_info.sys_columns.keys()
49
47
 
50
- # universal check - all dv2 raw objects should be auditable
51
- for required_col in modeling_metadata.required_columns:
52
- if required_col not in system_column_names:
53
- raise Exception(f"{required_col} column is required")
54
-
55
- # scd2dim specific validations
56
48
  if modeling_metadata.hashdiff not in system_column_names:
57
49
  raise Exception(f"{modeling_metadata.hashdiff} column is required for scd2dim validation")
58
50
  if modeling_metadata.is_available not in system_column_names:
59
51
  raise Exception(f"{modeling_metadata.is_available} column is required for scd2dim validation")
60
52
 
61
- return ValidationResult(
62
- stg_schema=stg_info.stg_schema,
63
- entity_name=stg_info.stg_name,
64
- bk_keys=[(nm, tp) for nm, tp in stg_info.bk_keys.items()],
65
- hk_key=hk_key,
66
- business_column_types=stg_info.bus_columns,
67
- system_column_types=stg_info.sys_columns,
68
- )
69
-
70
53
  class Scd2DimSchemaGenerator(SchemaGenerator):
71
54
 
72
55
  def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
73
56
  self.dialect_handler = dialect_handler
74
57
  self._validation_result = validation_result
75
58
 
76
- @property
77
- def entity_info(self) -> ValidationResult:
78
- return self._validation_result
79
-
80
59
  def make_tables(self) -> dict[str, Table]:
81
60
  entity_name = self._validation_result.entity_name
82
61
  bk_keys = self._validation_result.bk_keys
83
- hk_key = self._validation_result.hk_key
62
+ hk_key = self._validation_result.hk_keys[0]
84
63
  business_column_types = self._validation_result.business_column_types
85
64
  system_column_types = self._validation_result.system_column_types
86
65
 
@@ -199,9 +178,9 @@ class Scd2DimSchemaGenerator(SchemaGenerator):
199
178
 
200
179
  # Generate sat procedure
201
180
  sat_table = tables["sat"]
202
- sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_sat_proc(
181
+ sat_proc_code, sat_proc_name, sat_call_stmt = self.dialect_handler.make_scd2_sat_proc(
203
182
  sat_table=sat_table,
204
- hk_name=self._validation_result.hk_key[0],
183
+ hk_name=self._validation_result.hk_keys[0][0],
205
184
  hashdiff_col=modeling_metadata.hashdiff,
206
185
  is_available_col=modeling_metadata.is_available,
207
186
  loaddate_col=modeling_metadata.loaddate,
@@ -212,7 +191,7 @@ class Scd2DimSchemaGenerator(SchemaGenerator):
212
191
 
213
192
  # Generate dim procedure
214
193
  dim_table = tables["dim"]
215
- dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_dim_scd2_proc(
194
+ dim_proc_code, dim_proc_name, dim_call_stmt = self.dialect_handler.make_scd2_dim_proc(
216
195
  dim_table=dim_table,
217
196
  bk_keys=self._validation_result.bk_keys,
218
197
  header=header
@@ -220,25 +199,28 @@ class Scd2DimSchemaGenerator(SchemaGenerator):
220
199
  procedures["dim"] = (dim_proc_code, dim_proc_name, dim_call_stmt)
221
200
 
222
201
  # Generate job procedure
202
+ job_proc_names = [] # order-sensitive
203
+ if self._validation_result.stg_schema == "proxy":
204
+ job_proc_names.append(stg_proc_name)
205
+ job_proc_names.extend([hub_proc_name, sat_proc_name, dim_proc_name])
223
206
  job_proc_code, job_proc_name, job_call_stmt = self.dialect_handler.make_job_proc(
224
207
  entity_name=self._validation_result.entity_name,
225
- hub_proc_name=hub_proc_name,
226
- sat_proc_name=sat_proc_name,
227
- dim_proc_name=dim_proc_name,
228
- stg_proc_name=stg_proc_name,
208
+ proc_names=job_proc_names,
229
209
  header=header
230
210
  )
231
211
  procedures["job"] = (job_proc_code, job_proc_name, job_call_stmt)
232
212
 
233
213
  # Generate drop procedure
214
+ drop_table_schemas = ["hub", "sat", "dim"]
215
+ if self._validation_result.stg_schema == "proxy":
216
+ drop_table_schemas.append("stg")
217
+ drop_proc_names = [job_proc_name, hub_proc_name, sat_proc_name, dim_proc_name]
218
+ if self._validation_result.stg_schema == "proxy":
219
+ drop_proc_names.append(stg_proc_name)
234
220
  drop_proc_code, drop_proc_name, drop_call_stmt = self.dialect_handler.make_drop_proc(
235
221
  entity_name=self._validation_result.entity_name,
236
- stg_schema=self._validation_result.stg_schema,
237
- job_proc_name=job_proc_name,
238
- stg_proc_name=stg_proc_name,
239
- hub_proc_name=hub_proc_name,
240
- sat_proc_name=sat_proc_name,
241
- dim_proc_name=dim_proc_name,
222
+ table_schemas=drop_table_schemas,
223
+ procedures=drop_proc_names,
242
224
  header=header
243
225
  )
244
226
  procedures["drop"] = (drop_proc_code, drop_proc_name, drop_call_stmt)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: sandwich
3
- Version: 0.2.2
3
+ Version: 0.3.0
4
4
  Summary: DataVault 2.0 code gen
5
5
  Keywords: DWH,Data Vault 2.0
6
6
  Author: Andrey Morozov
@@ -124,14 +124,14 @@ select cast(31 as bigint) [bk_id]
124
124
  | | | | DateTo |
125
125
 
126
126
  ### link2fact profile columns mapping:
127
- | stg | link | sat | fact |
128
- |--------------------|------------------------|----------------------------|------|
129
- | HKs... | (uk)(fk)HKs... | | |
130
- | hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
131
- | degenerate_field | (uk)degenerate_field | degenerate_field | |
132
- | LoadDate | LoadDate | LoadDate | |
133
- | RecordSource | RecordSource | RecordSource | |
134
- | FLDs... | | FLDs... | |
127
+ | stg | link | sat | fact |
128
+ |--------------------|--------------------------------|----------------------------|------|
129
+ | HKs... | (uk)(fk)hk_`other_entity_name` | | |
130
+ | hk_`[entity_name]` | (pk)hk_`[entity_name]` | (pk)(fk)hk_`[entity_name]` | |
131
+ | <degenerate_field> | (uk)<degenerate_field> | <degenerate_field> | |
132
+ | LoadDate | LoadDate | LoadDate | |
133
+ | RecordSource | RecordSource | RecordSource | |
134
+ | FLDs... | | FLDs... | |
135
135
 
136
136
 
137
137
  ### Schemas:
@@ -147,11 +147,13 @@ select cast(31 as bigint) [bk_id]
147
147
  * `proxy` - source data for a materialized staging area (meant for wrapping external data sources as SQL views)
148
148
 
149
149
  ### DV2-related schemas layering
150
- | LoB* | staging | raw vault | business vault | information vault |
151
- |-------|---------|-----------|----------------|-------------------|
152
- | proxy | stg | hub | sal | dim |
153
- | | | sat | | fact |
154
- | | | link | | |
150
+ data -> ELT -> report
151
+
152
+ | LoB* data | staging (E) | raw vault (L) | business vault (T) | information vault |
153
+ |-----------|-------------|---------------|--------------------|-------------------|
154
+ | | stg | hub | sal | dim |
155
+ | | proxy | sat | | fact |
156
+ | | pool | link | | |
155
157
  _* Line of Business applications_
156
158
 
157
159
  ### Usage diagram
@@ -174,4 +176,4 @@ _* Line of Business applications_
174
176
  + +--------+ +---------------+
175
177
  +
176
178
 
177
- ```
179
+ ```
@@ -0,0 +1,23 @@
1
+ sandwich/__init__.py,sha256=DiQSmvml9OXujAYHILR4jz8UjoxbMvxFgRIlsdRza1E,80
2
+ sandwich/dialects/__init__.py,sha256=zQ4oigT3yqjZyl_IL_Tc-GmyoJar_Oqj_bGyiRVdSjg,415
3
+ sandwich/dialects/base.py,sha256=5wRh4T4e-SJbB-8eYtyQhd01HG-c0-x52tda7HFzSGs,3759
4
+ sandwich/dialects/ddl_mssql.py,sha256=VkwkJ373hDQNe2y9d9M3YJCdeCJmleUzk1hxvJjEhbA,3362
5
+ sandwich/dialects/ddl_postgres.py,sha256=Mdk2dAcSp8Nwc0p98nCnRtQ3vT09lH-cJVa6SdLLuwg,2995
6
+ sandwich/dialects/factory.py,sha256=-mpWGKp8NRmTFCXVlhbTsGhR0oAwOEdm-xnBolwWrQo,911
7
+ sandwich/dialects/mssql.py,sha256=laSPSfZR38aP3DBDHxv0YKwXC60kevVpyqjUmVu1xhk,10226
8
+ sandwich/dialects/postgres.py,sha256=8wVuqKqVxT0Mch6opK81qAyP-UaVDz5OvCuC5LEbe-U,4030
9
+ sandwich/dialects/utils.py,sha256=mdFp5jkJquhnYB3wiRjRunsGcHtmpcbjBzzgONfbM7w,5774
10
+ sandwich/dwh/__init__.py,sha256=e1Pev1TtNBO0rikHY8GrkfjcSdu8ow7xadlqvpiIHwE,4157
11
+ sandwich/errors.py,sha256=kIJmYbUf9wOnshJbFHwhqxZ3qEEdVtOy5Dcb2bSdnAk,872
12
+ sandwich/main.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ sandwich/modeling/__init__.py,sha256=owIplBqXoHBoiQ2D_zmckaFsXcXEl4kgT3HwBqPuLUU,4091
14
+ sandwich/modeling/strategies/__init__.py,sha256=VePgBfvPCLl4_GJIWlbN-VbWjB7iD0ztPjMYuzIsi3U,403
15
+ sandwich/modeling/strategies/base.py,sha256=_5mWrQEmyx5BESIxaZ6eC7GZjCoFQNJ9p0gXqDfGBVA,3583
16
+ sandwich/modeling/strategies/factory.py,sha256=Lc1MugJ-3g335lcXb3H4mVJgD0toDTYR-dmsK6-n1Yw,1598
17
+ sandwich/modeling/strategies/link2fact.py,sha256=KPaccOWqMmpiXA6U9Pb7IAhSQAl-xN3QaiX4jVcWRIY,9229
18
+ sandwich/modeling/strategies/scd2dim.py,sha256=qHlMj800lk6W7ipT9LsM_sND-EqYVttE5qIqEGGHmk8,10194
19
+ sandwich/py.typed,sha256=70pF0eMpuZgOyb0zFSE07ugId_AoU5z6CpLlVfg3pik,34
20
+ sandwich-0.3.0.dist-info/WHEEL,sha256=YUH1mBqsx8Dh2cQG2rlcuRYUhJddG9iClegy4IgnHik,79
21
+ sandwich-0.3.0.dist-info/entry_points.txt,sha256=0GSrDEOq9Qo5CwxppoVq-HNcILK65PchNmI6J0ripq8,44
22
+ sandwich-0.3.0.dist-info/METADATA,sha256=KccsRqwgn-UPVEDAUkmfekTHaYw3SVRlJW-0RByVnAM,9127
23
+ sandwich-0.3.0.dist-info/RECORD,,
sandwich/dv2_helper.py DELETED
@@ -1,98 +0,0 @@
1
- from datetime import datetime
2
-
3
- from sqlalchemy import Connection, Engine, MetaData, Table, select, text
4
-
5
- from src.sandwich.dialects import DialectHandlerFactory
6
- from src.sandwich.modeling import get_stg_info, infer_template
7
- from src.sandwich.strategies import StrategyFactory
8
-
9
- from . import errors as err
10
-
11
-
12
- class Dv2Helper:
13
-
14
- def __init__(self, stg: Table, dialect: str = "mssql", template: str | None = None):
15
- stg_info = get_stg_info(stg)
16
- if template is None:
17
- template = infer_template(stg_info)
18
- if template not in ("scd2dim", "link2fact"):
19
- raise ValueError(
20
- f"Template '{template}' is not supported. Supported templates: scd2dim, link2fact"
21
- )
22
-
23
- # legacy fields
24
- self.stg_table = stg
25
- self.entity_name = stg.name
26
- self.template = template
27
-
28
- self.dialect = dialect
29
- self.dialect_handler = DialectHandlerFactory.create_handler(dialect)
30
- self.validator = StrategyFactory.create_validator(template)
31
- self.validation_result = self.validator.validate_staging(stg_info)
32
- self.schema_generator = StrategyFactory.create_generator(template,
33
- self.dialect_handler,
34
- self.validation_result)
35
-
36
- # Convenience properties (for backward compatibility if needed)
37
- self.bk_keys = self.validation_result.bk_keys
38
- self.hk_key = self.validation_result.hk_key
39
- self.business_column_types = self.validation_result.business_column_types
40
- self.system_column_types = self.validation_result.system_column_types
41
-
42
- def call_register_entity(self, conn: Engine | Connection) -> datetime:
43
- if self.dialect == "mssql":
44
- call_stmt = "exec core.[register_entity] :entity_name, :template"
45
- elif self.dialect == "postgres":
46
- call_stmt = "call core.register_entity (:entity_name, :template)"
47
- else:
48
- raise err.Dv2NotYetImplementedForDialectError(self.dialect)
49
-
50
- conn.execute(
51
- text(call_stmt),
52
- {
53
- "entity_name": self.entity_name,
54
- "template": self.template,
55
- })
56
-
57
- entities = Table("entities", MetaData(), schema="core", autoload_with=conn)
58
- stmt = select(entities.c.created).where(self.entity_name == entities.c.entity_name)
59
- return conn.execute(stmt).scalar_one()
60
-
61
- # def call_job_proc(self, conn: Engine | Connection, parent_execution_id: int = -1) -> None:
62
- # job_proc_name = self.schema_generator.get_job_proc_name(self.entity_name, self.dialect)
63
- #
64
- # if self.dialect == "mssql":
65
- # call_stmt = f"exec {job_proc_name} :parent_executionID"
66
- # else:
67
- # raise err.Dv2NotYetImplementedForDialectError(self.dialect)
68
- #
69
- # conn.execute(text(call_stmt), {"parent_executionID": parent_execution_id})
70
-
71
- def generate_schema(self, conn: Engine | Connection, verbose: bool = False) -> None:
72
- registered_on = self.call_register_entity(conn)
73
- if verbose:
74
- print(f"[ok] Registered `{self.entity_name}` for `{self.template}`")
75
-
76
- tables = self.schema_generator.make_tables()
77
- for table_type, table in tables.items():
78
- if table is not None:
79
- table.create(conn, checkfirst=True)
80
- if verbose:
81
- print(f"[ok] Created table [{table.schema}].[{table.name}]")
82
-
83
- procedures = self.schema_generator.make_procedures(tables, registered_on)
84
- for proc_type, (proc_code, proc_name, _) in procedures.items():
85
- conn.execute(text(proc_code))
86
- if verbose:
87
- print(f"[ok] Created or altered {proc_name}")
88
-
89
- @classmethod
90
- def update_registered_entities(cls, conn: Engine | Connection, dialect: str = "mssql",
91
- verbose: bool = False):
92
- metadata = MetaData()
93
- entities = Table("entities", metadata, schema="core", autoload_with=conn)
94
- select_result = conn.execute(entities.select().where(~entities.c.is_deleted))
95
- for row in select_result.mappings().all():
96
- stg = Table(row["entity_name"], metadata, schema="stg", autoload_with=conn)
97
- dv2 = cls(stg, dialect=dialect, template=row["template"])
98
- dv2.generate_schema(conn, verbose=verbose)
@@ -1,44 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from datetime import datetime
3
- from typing import Any, Tuple
4
-
5
- from sqlalchemy import Table
6
-
7
- from src.sandwich.modeling import StgInfo
8
-
9
- class ValidationResult:
10
- def __init__(self, stg_schema: str, entity_name: str
11
- , bk_keys: list[Tuple[str, Any]]
12
- , hk_key: Tuple[str, Any]
13
- , business_column_types: dict[str, Any]
14
- , system_column_types: dict[str, Any]):
15
- self.stg_schema = stg_schema
16
- self.entity_name = entity_name
17
- self.bk_keys = bk_keys
18
- self.hk_key = hk_key
19
- self.business_column_types = business_column_types
20
- self.system_column_types = system_column_types
21
-
22
- class Validator(ABC):
23
- @abstractmethod
24
- def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> ValidationResult:
25
- pass
26
-
27
-
28
- class SchemaGenerator(ABC):
29
- @property
30
- @abstractmethod
31
- def entity_info(self) -> ValidationResult: ...
32
-
33
- @abstractmethod
34
- def make_tables(self) -> dict[str, Table]:
35
- pass
36
-
37
- @abstractmethod
38
- def make_procedures(
39
- self,
40
- tables: dict[str, Table],
41
- entity_registration_date: datetime = datetime.now()
42
- ) -> dict[str, Tuple[str, str, str]]:
43
- pass
44
-
@@ -1,91 +0,0 @@
1
- """Link to Fact strategy implementations."""
2
- from datetime import datetime
3
- from typing import Tuple
4
-
5
- from sqlalchemy import Table
6
-
7
- from src.sandwich.dialects.base import DialectHandler
8
-
9
- from .base import Validator, SchemaGenerator, ValidationResult
10
-
11
-
12
- class Link2FactValidator(Validator):
13
-
14
- def validate_staging(self, stg_info: StgInfo, verbose: bool = False) -> dict:
15
- """Validate staging table for link2fact mode."""
16
- if verbose:
17
- raise Exception("verbose is not implemented yet")
18
-
19
- # TODO: Implement link2fact specific validation logic
20
- # This will likely be different from scd2dim validation
21
- # For example: checking for link keys, fact columns, etc.
22
-
23
- bk_keys = []
24
- hk_key = None
25
- business_column_types = {}
26
- system_column_types = {}
27
- link_keys = [] # New concept for link2fact
28
- fact_columns = [] # New concept for link2fact
29
-
30
- # Placeholder validation logic
31
- for col in stg_table.columns.values():
32
- # TODO: Implement column classification for link2fact mode
33
- pass
34
-
35
- return {
36
- "stg_schema": stg_table.schema,
37
- "entity_name": stg_table.name,
38
- "bk_keys": bk_keys,
39
- "hk_key": hk_key,
40
- "business_column_types": business_column_types,
41
- "system_column_types": system_column_types,
42
- "link_keys": link_keys,
43
- "fact_columns": fact_columns,
44
- }
45
-
46
-
47
- class Link2FactSchemaGenerator(SchemaGenerator):
48
-
49
- def __init__(self, dialect_handler: DialectHandler, validation_result: ValidationResult):
50
- self.dialect_handler = dialect_handler
51
- self._validation_result = validation_result
52
-
53
- @property
54
- def entity_info(self) -> ValidationResult:
55
- return self._validation_result
56
-
57
- def make_tables(self) -> dict[str, Table]:
58
- """Create link and fact tables for link2fact mode."""
59
- # TODO: Implement link2fact table creation
60
- # This will create different table structures than scd2dim
61
- # For example: link table, fact table (instead of hub/sat/dim)
62
-
63
- entity_name = self._validation_result.entity_name
64
-
65
- # Placeholder - actual implementation needed
66
- link_table: Table | None = None
67
- fact_table: Table | None = None
68
-
69
- return {
70
- "link": link_table,
71
- "fact": fact_table,
72
- }
73
-
74
- def make_procedures(
75
- self,
76
- tables: dict[str, Table],
77
- entity_registration_date: datetime,
78
- ) -> dict[str, Tuple[str, str]]:
79
- """Generate procedures for link2fact mode."""
80
- procedures = {}
81
-
82
- # TODO: Implement link2fact procedure generation using dialect_handler
83
- # This will generate different procedures than scd2dim
84
- # For example: link population, fact population, aggregation logic, etc.
85
-
86
- # When implementing, use self.dialect_handler methods to generate SQL
87
- # Example:
88
- # link_proc_code, link_proc_name = self.dialect_handler.make_link_proc(...)
89
- # procedures["link"] = (link_proc_code, link_proc_name)
90
-
91
- return procedures