dg-kit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dg_kit/__init__.py +0 -0
- dg_kit/base/__init__.py +0 -0
- dg_kit/base/business_information.py +60 -0
- dg_kit/base/convention.py +57 -0
- dg_kit/base/data_catalog.py +7 -0
- dg_kit/base/dataclasses/__init__.py +7 -0
- dg_kit/base/dataclasses/business_information.py +77 -0
- dg_kit/base/dataclasses/convention.py +30 -0
- dg_kit/base/dataclasses/data_catalog.py +64 -0
- dg_kit/base/dataclasses/logical_model.py +86 -0
- dg_kit/base/dataclasses/physical_model.py +38 -0
- dg_kit/base/enums.py +13 -0
- dg_kit/base/logical_model.py +66 -0
- dg_kit/base/physical_model.py +41 -0
- dg_kit/integrations/__init__.py +0 -0
- dg_kit/integrations/dbt/README.md +27 -0
- dg_kit/integrations/dbt/__init__.py +0 -0
- dg_kit/integrations/dbt/parser.py +202 -0
- dg_kit/integrations/notion/README.md +38 -0
- dg_kit/integrations/notion/__init__.py +0 -0
- dg_kit/integrations/notion/api.py +495 -0
- dg_kit/integrations/notion/formater.py +65 -0
- dg_kit/integrations/odm/README.md +46 -0
- dg_kit/integrations/odm/__init__.py +0 -0
- dg_kit/integrations/odm/attr_types.py +6 -0
- dg_kit/integrations/odm/parser.py +490 -0
- dg_kit-0.1.0.dist-info/METADATA +99 -0
- dg_kit-0.1.0.dist-info/RECORD +29 -0
- dg_kit-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import xml.etree.ElementTree as ET
|
|
4
|
+
|
|
5
|
+
from typing import Mapping, Optional, Set, Tuple
|
|
6
|
+
|
|
7
|
+
from dg_kit.base.logical_model import LogicalModelsDatabase
|
|
8
|
+
from dg_kit.base.business_information import BusinessInformationDatabase
|
|
9
|
+
|
|
10
|
+
from dg_kit.base.logical_model import LogicalModel
|
|
11
|
+
from dg_kit.base.dataclasses.logical_model import (
|
|
12
|
+
EntityIdentifier,
|
|
13
|
+
Entity,
|
|
14
|
+
Attribute,
|
|
15
|
+
Relation,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from dg_kit.base.business_information import BusinessInformation
|
|
19
|
+
from dg_kit.base.dataclasses.business_information import (
|
|
20
|
+
Contact,
|
|
21
|
+
Team,
|
|
22
|
+
Email,
|
|
23
|
+
Url,
|
|
24
|
+
Document,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from dg_kit.integrations.odm.attr_types import ODMAttributeTypesMapping
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ODMParser:
|
|
31
|
+
def __init__(self, odm_project_path: Path):
|
|
32
|
+
if not isinstance(odm_project_path, Path):
|
|
33
|
+
odm_project_path = Path(odm_project_path)
|
|
34
|
+
if not odm_project_path.is_file() and not odm_project_path.name.endswith(
|
|
35
|
+
".dmd"
|
|
36
|
+
):
|
|
37
|
+
raise ValueError(
|
|
38
|
+
f"odm_project_path must be a valid Oracle Data Modeler project (.dmd) file, got: {odm_project_path}"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
self.odm_project_path = odm_project_path
|
|
42
|
+
|
|
43
|
+
self.model_name = self.odm_project_path.stem
|
|
44
|
+
self.model_assets_path = self.odm_project_path.parent / self.model_name
|
|
45
|
+
|
|
46
|
+
if not self.model_assets_path.is_dir():
|
|
47
|
+
raise FileNotFoundError(
|
|
48
|
+
f"Expected project folder named '{self.model_name}' next to {self.model_name}.dmd, "
|
|
49
|
+
f"but assets folder {self.model_assets_path} not found. "
|
|
50
|
+
"ODM project is corrupted!"
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
self.logical_model_path = self.model_assets_path / "logical"
|
|
54
|
+
self.entites_path = self.logical_model_path / "entity"
|
|
55
|
+
self.relations_path = self.logical_model_path / "relation"
|
|
56
|
+
|
|
57
|
+
self.business_information_path = self.model_assets_path / "businessinfo"
|
|
58
|
+
self.contacts_path = self.business_information_path / "contact"
|
|
59
|
+
self.documents_path = self.business_information_path / "document"
|
|
60
|
+
self.emails_path = self.business_information_path / "email"
|
|
61
|
+
self.urls_path = self.business_information_path / "url"
|
|
62
|
+
self.parties_path = self.business_information_path / "party"
|
|
63
|
+
|
|
64
|
+
self.LM = LogicalModel(self.model_name)
|
|
65
|
+
self.BI = BusinessInformation(self.model_name)
|
|
66
|
+
|
|
67
|
+
self.LM.all_lm_units_by_odm_id = {}
|
|
68
|
+
self.BI.all_bi_units_by_odm_id = {}
|
|
69
|
+
|
|
70
|
+
def _parse_responsible_parties(self, elem: ET.Element) -> Optional[Set[str]]:
|
|
71
|
+
parties = tuple(
|
|
72
|
+
[
|
|
73
|
+
self.BI.all_bi_units_by_odm_id[p.text]
|
|
74
|
+
for p in elem.findall("./responsibleParties/party")
|
|
75
|
+
]
|
|
76
|
+
)
|
|
77
|
+
return parties or tuple()
|
|
78
|
+
|
|
79
|
+
def _parse_documents(self, elem: ET.Element) -> Optional[Set[str]]:
|
|
80
|
+
docs_elem = elem.find("./documents")
|
|
81
|
+
|
|
82
|
+
if docs_elem is None:
|
|
83
|
+
return tuple()
|
|
84
|
+
|
|
85
|
+
documents_ids = docs_elem.attrib.get("usedDucuments").split(" ")
|
|
86
|
+
documents = tuple(
|
|
87
|
+
[
|
|
88
|
+
self.BI.all_bi_units_by_odm_id[document_id]
|
|
89
|
+
for document_id in documents_ids
|
|
90
|
+
]
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
return documents
|
|
94
|
+
|
|
95
|
+
def _parse_dt_utc(self, s: Optional[str]) -> Optional[datetime]:
|
|
96
|
+
if not s:
|
|
97
|
+
return None
|
|
98
|
+
# Example: "2025-12-15 12:01:55 UTC"
|
|
99
|
+
s = s.strip()
|
|
100
|
+
if s.endswith(" UTC"):
|
|
101
|
+
s = s[:-4]
|
|
102
|
+
try:
|
|
103
|
+
return datetime.strptime(s, "%Y-%m-%d %H:%M:%S")
|
|
104
|
+
except ValueError:
|
|
105
|
+
return None
|
|
106
|
+
|
|
107
|
+
def _parse_bool(self, text: Optional[str]) -> Optional[bool]:
|
|
108
|
+
if text is None:
|
|
109
|
+
return None
|
|
110
|
+
t = text.strip().lower()
|
|
111
|
+
if t == "true":
|
|
112
|
+
return True
|
|
113
|
+
if t == "false":
|
|
114
|
+
return False
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
def _parse_dynamic_properties(self, elem: ET.Element) -> dict[str, str]:
|
|
118
|
+
"""
|
|
119
|
+
Parse Oracle Data Modeler <propertyMap><property name="..." value="..."/></propertyMap>
|
|
120
|
+
and return a flat dict {name: value}.
|
|
121
|
+
|
|
122
|
+
Works for both Entity and Attribute XML elements.
|
|
123
|
+
- Ignores empty/missing names
|
|
124
|
+
- Keeps empty values as "" (so you can distinguish "present but blank")
|
|
125
|
+
- If duplicate names exist, the last one wins (matches typical override behavior)
|
|
126
|
+
"""
|
|
127
|
+
props: dict[str, str] = {}
|
|
128
|
+
|
|
129
|
+
prop_map = elem.find("./propertyMap")
|
|
130
|
+
if prop_map is None:
|
|
131
|
+
return props
|
|
132
|
+
|
|
133
|
+
for p in prop_map.findall("./property"):
|
|
134
|
+
name = (p.attrib.get("name") or "").strip()
|
|
135
|
+
if not name:
|
|
136
|
+
continue
|
|
137
|
+
value = p.attrib.get("value") or ""
|
|
138
|
+
props[name] = value
|
|
139
|
+
|
|
140
|
+
return props
|
|
141
|
+
|
|
142
|
+
def _parse_entity_identifiers(
|
|
143
|
+
self, entity_id, elem: ET.Element
|
|
144
|
+
) -> Tuple[EntityIdentifier]:
|
|
145
|
+
identifiers: Set[EntityIdentifier] = set()
|
|
146
|
+
|
|
147
|
+
for ident in elem.findall("./identifiers/identifier"):
|
|
148
|
+
ident_id = ident.attrib.get("id")
|
|
149
|
+
if not ident_id:
|
|
150
|
+
continue
|
|
151
|
+
|
|
152
|
+
name = ident.attrib.get("name")
|
|
153
|
+
|
|
154
|
+
pk_txt = ident.findtext("./pk")
|
|
155
|
+
is_pk = pk_txt == "true"
|
|
156
|
+
|
|
157
|
+
used_attributes_ids = tuple(
|
|
158
|
+
[a.text for a in ident.findall("./usedAttributes/attributeRef")]
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
entity_identifier = EntityIdentifier(
|
|
162
|
+
natural_key=ident_id,
|
|
163
|
+
name=name,
|
|
164
|
+
is_pk=is_pk,
|
|
165
|
+
entity_id=entity_id,
|
|
166
|
+
used_attributes_ids=used_attributes_ids,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
identifiers.add(entity_identifier)
|
|
170
|
+
|
|
171
|
+
return tuple(identifiers)
|
|
172
|
+
|
|
173
|
+
def parse_bi(self) -> BusinessInformation:
|
|
174
|
+
# Documents
|
|
175
|
+
for seg in self.documents_path.iterdir():
|
|
176
|
+
for document_xml in seg.iterdir():
|
|
177
|
+
xml_root = ET.parse(document_xml).getroot()
|
|
178
|
+
|
|
179
|
+
document_dynamic_props = self._parse_dynamic_properties(xml_root)
|
|
180
|
+
|
|
181
|
+
document = Document(
|
|
182
|
+
natural_key=xml_root.attrib["name"],
|
|
183
|
+
name=xml_root.attrib["name"],
|
|
184
|
+
reference=document_dynamic_props.get("reference"),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
self.BI.register_document(document)
|
|
188
|
+
self.BI.all_bi_units_by_odm_id[xml_root.attrib["id"]] = document
|
|
189
|
+
|
|
190
|
+
# Emails
|
|
191
|
+
for seg in self.emails_path.iterdir():
|
|
192
|
+
for email_xml in seg.iterdir():
|
|
193
|
+
xml_root = ET.parse(email_xml).getroot()
|
|
194
|
+
|
|
195
|
+
email = Email(
|
|
196
|
+
natural_key=xml_root.attrib["name"],
|
|
197
|
+
name=xml_root.attrib["name"],
|
|
198
|
+
email_address=xml_root.findtext("emailAddress"),
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
self.BI.register_email(email)
|
|
202
|
+
self.BI.all_bi_units_by_odm_id[xml_root.attrib["id"]] = email
|
|
203
|
+
|
|
204
|
+
# URLs
|
|
205
|
+
for seg in self.urls_path.iterdir():
|
|
206
|
+
for url_xml in seg.iterdir():
|
|
207
|
+
xml_root = ET.parse(url_xml).getroot()
|
|
208
|
+
|
|
209
|
+
url = Url(
|
|
210
|
+
natural_key=xml_root.attrib["name"],
|
|
211
|
+
name=xml_root.attrib["name"],
|
|
212
|
+
url=xml_root.findtext("url"),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
self.BI.register_url(url)
|
|
216
|
+
self.BI.all_bi_units_by_odm_id[xml_root.attrib["id"]] = url
|
|
217
|
+
|
|
218
|
+
# Contacts
|
|
219
|
+
for seg in self.contacts_path.iterdir():
|
|
220
|
+
for contact_xml in seg.iterdir():
|
|
221
|
+
xml_root = ET.parse(contact_xml).getroot()
|
|
222
|
+
|
|
223
|
+
emails = tuple(
|
|
224
|
+
[
|
|
225
|
+
self.BI.all_bi_units_by_odm_id[p.text]
|
|
226
|
+
for p in xml_root.findall("./emails/email")
|
|
227
|
+
]
|
|
228
|
+
)
|
|
229
|
+
urls = tuple(
|
|
230
|
+
[
|
|
231
|
+
self.BI.all_bi_units_by_odm_id[p.text]
|
|
232
|
+
for p in xml_root.findall("./urls/urls")
|
|
233
|
+
]
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
contact = Contact(
|
|
237
|
+
natural_key=xml_root.attrib["name"],
|
|
238
|
+
name=xml_root.attrib["name"],
|
|
239
|
+
emails=emails if emails else tuple(),
|
|
240
|
+
urls=urls if urls else tuple(),
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
self.BI.register_contact(contact)
|
|
244
|
+
self.BI.all_bi_units_by_odm_id[xml_root.attrib["id"]] = contact
|
|
245
|
+
|
|
246
|
+
# Parties
|
|
247
|
+
for seg in self.parties_path.iterdir():
|
|
248
|
+
for party_xml in seg.iterdir():
|
|
249
|
+
xml_root = ET.parse(party_xml).getroot()
|
|
250
|
+
|
|
251
|
+
contacts = tuple(
|
|
252
|
+
[
|
|
253
|
+
self.BI.all_bi_units_by_odm_id[p.text]
|
|
254
|
+
for p in xml_root.findall("./contacts/contact")
|
|
255
|
+
]
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
team = Team(
|
|
259
|
+
natural_key=xml_root.attrib["name"],
|
|
260
|
+
name=xml_root.attrib["name"],
|
|
261
|
+
contacts=contacts,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
self.BI.register_team(team)
|
|
265
|
+
self.BI.all_bi_units_by_odm_id[xml_root.attrib["id"]] = team
|
|
266
|
+
|
|
267
|
+
return self.BI
|
|
268
|
+
|
|
269
|
+
def parse_lm(self) -> LogicalModel:
|
|
270
|
+
dependencies = {}
|
|
271
|
+
for seg in self.entites_path.iterdir():
|
|
272
|
+
for entity_xml in seg.iterdir():
|
|
273
|
+
xml_root = ET.parse(entity_xml).getroot()
|
|
274
|
+
|
|
275
|
+
entity_id = xml_root.attrib["id"]
|
|
276
|
+
|
|
277
|
+
entity_dynamic_props = self._parse_dynamic_properties(xml_root)
|
|
278
|
+
entity_identifiers = self._parse_entity_identifiers(
|
|
279
|
+
xml_root.attrib["id"], xml_root
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
entity_responsible_parties = tuple(
|
|
283
|
+
self._parse_responsible_parties(xml_root)
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
entity_domain = entity_dynamic_props.get("domain")
|
|
287
|
+
|
|
288
|
+
entity_linked_attribute_ids = []
|
|
289
|
+
entity_attributes_xml = xml_root.findall("./attributes/Attribute")
|
|
290
|
+
for attr_xml in entity_attributes_xml:
|
|
291
|
+
attribute_dynamic_props = self._parse_dynamic_properties(attr_xml)
|
|
292
|
+
attribute_responsible_parties = (
|
|
293
|
+
tuple(self._parse_responsible_parties(attr_xml))
|
|
294
|
+
or entity_responsible_parties
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
referenced_attribute_id = attr_xml.findtext("./referedAttribute")
|
|
298
|
+
if referenced_attribute_id:
|
|
299
|
+
if entity_id in dependencies:
|
|
300
|
+
dependencies[entity_id].append(referenced_attribute_id)
|
|
301
|
+
else:
|
|
302
|
+
dependencies[entity_id] = [referenced_attribute_id]
|
|
303
|
+
continue
|
|
304
|
+
|
|
305
|
+
attr_pm_map_str = attribute_dynamic_props.get("pm_map")
|
|
306
|
+
attr_pm_map_tuple = (
|
|
307
|
+
tuple(attr_pm_map_str.split(",")) if attr_pm_map_str else ()
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
attr_master_source_systems_str = attribute_dynamic_props.get(
|
|
311
|
+
"master_source_systems"
|
|
312
|
+
)
|
|
313
|
+
attr_master_source_systems_tuple = (
|
|
314
|
+
tuple(attr_master_source_systems_str.split(","))
|
|
315
|
+
if attr_master_source_systems_str
|
|
316
|
+
else ()
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
attribute = Attribute(
|
|
320
|
+
natural_key=attr_xml.attrib["name"],
|
|
321
|
+
entity_id=entity_id,
|
|
322
|
+
name=attr_xml.attrib.get("name", ""),
|
|
323
|
+
data_type=ODMAttributeTypesMapping.get(
|
|
324
|
+
attr_xml.findtext("./logicalDatatype"),
|
|
325
|
+
"type missing in mapping",
|
|
326
|
+
),
|
|
327
|
+
sensitivity_type=(
|
|
328
|
+
attr_xml.findtext("./sensitiveType") or "Not sensitive"
|
|
329
|
+
),
|
|
330
|
+
description=(attr_xml.findtext("./comment") or ""),
|
|
331
|
+
documents=self._parse_documents(attr_xml),
|
|
332
|
+
pm_map=attr_pm_map_tuple,
|
|
333
|
+
domain=attribute_dynamic_props.get("domain", entity_domain),
|
|
334
|
+
master_source_systems=attr_master_source_systems_tuple,
|
|
335
|
+
responsible_parties=attribute_responsible_parties,
|
|
336
|
+
created_by=(attr_xml.findtext("./createdBy") or "").strip()
|
|
337
|
+
or None,
|
|
338
|
+
created_time=self._parse_dt_utc(
|
|
339
|
+
(attr_xml.findtext("./createdTime") or "").strip()
|
|
340
|
+
),
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
self.LM.register_attribute(attribute)
|
|
344
|
+
entity_linked_attribute_ids.append(attribute.id)
|
|
345
|
+
self.LM.all_lm_units_by_odm_id[attr_xml.attrib["id"]] = attribute
|
|
346
|
+
|
|
347
|
+
entity_pm_map_str = entity_dynamic_props.get("pm_map")
|
|
348
|
+
entity_pm_map_tuple = (
|
|
349
|
+
entity_pm_map_str.split(",") if entity_pm_map_str else tuple()
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
entity_master_source_systems_str = entity_dynamic_props.get(
|
|
353
|
+
"master_source_systems"
|
|
354
|
+
)
|
|
355
|
+
entity_master_source_systems_tuple = (
|
|
356
|
+
entity_master_source_systems_str.split(",")
|
|
357
|
+
if entity_master_source_systems_str
|
|
358
|
+
else tuple()
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
entity = Entity(
|
|
362
|
+
natural_key=xml_root.attrib["name"],
|
|
363
|
+
name=xml_root.attrib["name"],
|
|
364
|
+
description=xml_root.findtext("comment")
|
|
365
|
+
if xml_root.findtext("comment") is not None
|
|
366
|
+
else "",
|
|
367
|
+
identifiers=entity_identifiers,
|
|
368
|
+
attributes=tuple(entity_linked_attribute_ids),
|
|
369
|
+
responsible_parties=entity_responsible_parties,
|
|
370
|
+
documents=self._parse_documents(xml_root),
|
|
371
|
+
pm_map=entity_pm_map_tuple,
|
|
372
|
+
domain=entity_domain,
|
|
373
|
+
master_source_systems=entity_master_source_systems_tuple,
|
|
374
|
+
created_by=None,
|
|
375
|
+
created_time=None,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
self.LM.register_entity(entity)
|
|
379
|
+
self.LM.all_lm_units_by_odm_id[entity_id] = entity
|
|
380
|
+
|
|
381
|
+
for seg in self.relations_path.iterdir():
|
|
382
|
+
for relation_xml in seg.iterdir():
|
|
383
|
+
xml_root = ET.parse(relation_xml).getroot()
|
|
384
|
+
|
|
385
|
+
relation_dynamic_props = self._parse_dynamic_properties(xml_root)
|
|
386
|
+
|
|
387
|
+
relation_responsible_parties = tuple(
|
|
388
|
+
self._parse_responsible_parties(xml_root)
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
relation_pm_map_str = relation_dynamic_props.get("pm_map")
|
|
392
|
+
relation_pm_map_tuple = (
|
|
393
|
+
relation_pm_map_str.split(",") if relation_pm_map_str else tuple()
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
relation_dynamic_props_str = relation_dynamic_props.get(
|
|
397
|
+
"master_source_systems"
|
|
398
|
+
)
|
|
399
|
+
relation_dynamic_props_tuple = (
|
|
400
|
+
relation_dynamic_props_str.split(",")
|
|
401
|
+
if relation_dynamic_props_str
|
|
402
|
+
else tuple()
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
relation = Relation(
|
|
406
|
+
natural_key=xml_root.attrib["name"],
|
|
407
|
+
name=xml_root.attrib["name"],
|
|
408
|
+
domain=relation_dynamic_props.get("domain"),
|
|
409
|
+
description=xml_root.findtext("comment")
|
|
410
|
+
if xml_root.findtext("comment") is not None
|
|
411
|
+
else "",
|
|
412
|
+
pm_map=relation_pm_map_tuple,
|
|
413
|
+
master_source_systems=relation_dynamic_props_tuple,
|
|
414
|
+
responsible_parties=relation_responsible_parties,
|
|
415
|
+
documents=self._parse_documents(xml_root),
|
|
416
|
+
source_entity_id=self.LM.all_lm_units_by_odm_id[
|
|
417
|
+
xml_root.findtext("sourceEntity")
|
|
418
|
+
].id,
|
|
419
|
+
target_entity_id=self.LM.all_lm_units_by_odm_id[
|
|
420
|
+
xml_root.findtext("targetEntity")
|
|
421
|
+
].id,
|
|
422
|
+
optional_source=xml_root.findtext("optionalSource"),
|
|
423
|
+
optional_target=xml_root.findtext("optionalTarget"),
|
|
424
|
+
source_cardinality=xml_root.findtext("sourceCardinality"),
|
|
425
|
+
target_cardinality=xml_root.findtext("targetCardinalityString"),
|
|
426
|
+
created_by=None,
|
|
427
|
+
created_time=None,
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
self.LM.register_relation(relation)
|
|
431
|
+
self.LM.all_lm_units_by_odm_id[xml_root.attrib["id"]] = relation
|
|
432
|
+
|
|
433
|
+
for (
|
|
434
|
+
dependent_entity_odm_id,
|
|
435
|
+
list_of_referenced_attributes,
|
|
436
|
+
) in dependencies.items():
|
|
437
|
+
for attribute_odm_id in list_of_referenced_attributes:
|
|
438
|
+
self.LM.register_dependency(
|
|
439
|
+
self.LM.all_lm_units_by_odm_id[dependent_entity_odm_id],
|
|
440
|
+
self.LM.all_units_by_id[
|
|
441
|
+
self.LM.all_lm_units_by_odm_id[attribute_odm_id].id
|
|
442
|
+
],
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
return self.LM
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
class ODMVersionedProjectParser:
|
|
449
|
+
def __init__(self, odm_project_path: Path):
|
|
450
|
+
if not isinstance(odm_project_path, Path):
|
|
451
|
+
odm_project_path = Path(odm_project_path)
|
|
452
|
+
if not odm_project_path.is_dir():
|
|
453
|
+
raise ValueError(
|
|
454
|
+
f"odm_historical_projects_path must be a valid directory, got: {odm_project_path}"
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
self.odm_project_path = odm_project_path
|
|
458
|
+
|
|
459
|
+
self.odm_projects_paths = []
|
|
460
|
+
dmd_files = list(self.odm_project_path.glob("*.dmd"))
|
|
461
|
+
if dmd_files:
|
|
462
|
+
for dmd_file in dmd_files:
|
|
463
|
+
self.odm_projects_paths.append(dmd_file)
|
|
464
|
+
|
|
465
|
+
self.LMDatabse = LogicalModelsDatabase()
|
|
466
|
+
self.BIDatabase = BusinessInformationDatabase()
|
|
467
|
+
|
|
468
|
+
self.parse_project()
|
|
469
|
+
|
|
470
|
+
def parse_project(self) -> Mapping[str, LogicalModel]:
|
|
471
|
+
for model_path in self.odm_projects_paths:
|
|
472
|
+
parser = ODMParser(model_path)
|
|
473
|
+
bi = parser.parse_bi()
|
|
474
|
+
self.BIDatabase.register_business_information(bi)
|
|
475
|
+
lm = parser.parse_lm()
|
|
476
|
+
self.LMDatabse.register_logical_model(lm)
|
|
477
|
+
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
def get_model(self, model_name: str) -> LogicalModel:
|
|
481
|
+
if model_name not in self.LMDatabse.logical_models:
|
|
482
|
+
raise KeyError(f"Logical Model '{model_name}' not found in ODM database")
|
|
483
|
+
|
|
484
|
+
return self.LMDatabse.logical_models[model_name]
|
|
485
|
+
|
|
486
|
+
def get_bi(self, bi_name: str) -> BusinessInformation:
|
|
487
|
+
if bi_name not in self.BIDatabase.business_information:
|
|
488
|
+
raise KeyError(f"BusinessInformation '{bi_name}' not found in ODM database")
|
|
489
|
+
|
|
490
|
+
return self.BIDatabase.business_information[bi_name]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: dg_kit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Data Governance Kit provides programmatic access to data governance metadata with integrations for different Logical and Physical Modeling tools.
|
|
5
|
+
Author: Chelidze Georgii
|
|
6
|
+
Author-email: Chelidze Georgii <chelidze.georgii.d@gmail.com>
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Dist: pyyaml>=6.0.3 ; extra == 'dbt'
|
|
9
|
+
Requires-Dist: notion-client>=2.7.0 ; extra == 'notion'
|
|
10
|
+
Requires-Python: >=3.10
|
|
11
|
+
Provides-Extra: dbt
|
|
12
|
+
Provides-Extra: notion
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
|
|
15
|
+
## Data Governance Kit (dg_kit)
|
|
16
|
+
|
|
17
|
+
Data Governance Kit helps you access Data Governance information programmatically.
|
|
18
|
+
It provides core objects that model Physical Model, Logical Model, Business Information,
|
|
19
|
+
and related governance metadata. Integrations let you pull this data from tools like
|
|
20
|
+
dbt, Oracle Data Modeler, and Notion, with more connectors planned in upcoming releases.
|
|
21
|
+
|
|
22
|
+
This toolkit is handy for building Data Governance CI gates, strengthening Data Ops
|
|
23
|
+
practices, and keeping governance checks close to your delivery workflows.
|
|
24
|
+
|
|
25
|
+
## Requirements
|
|
26
|
+
- Python >= 3.10
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
```bash
|
|
30
|
+
pip install -e .
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Optional extras:
|
|
34
|
+
```bash
|
|
35
|
+
pip install -e ".[dbt]"
|
|
36
|
+
pip install -e ".[notion]"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
### Parse an Oracle Data Modeler project
|
|
42
|
+
```python
|
|
43
|
+
from dg_kit.integrations.odm.parser import ODMParser
|
|
44
|
+
|
|
45
|
+
parser = ODMParser("path/to/model.dmd")
|
|
46
|
+
bi = parser.parse_bi()
|
|
47
|
+
lm = parser.parse_lm()
|
|
48
|
+
|
|
49
|
+
print(lm.version, len(lm.entities))
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Parse a dbt project into a physical model
|
|
53
|
+
```python
|
|
54
|
+
from dg_kit.integrations.dbt.parser import DBTParser
|
|
55
|
+
|
|
56
|
+
pm = DBTParser("path/to/dbt_project").parse_pm()
|
|
57
|
+
print(pm.version, len(pm.tables))
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Validate with conventions
|
|
61
|
+
```python
|
|
62
|
+
from dg_kit.base.convention import Convention, ConventionValidator
|
|
63
|
+
from dg_kit.base.enums import ConventionRuleSeverity
|
|
64
|
+
|
|
65
|
+
convention = Convention("example")
|
|
66
|
+
|
|
67
|
+
@convention.rule(
|
|
68
|
+
name="has-entities",
|
|
69
|
+
severity=ConventionRuleSeverity.ERROR,
|
|
70
|
+
description="Logical model must contain at least one entity",
|
|
71
|
+
)
|
|
72
|
+
def has_entities(lm, pm):
|
|
73
|
+
return set() if lm.entities else {("no entities")}
|
|
74
|
+
|
|
75
|
+
issues = ConventionValidator(lm, pm, convention).validate()
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Sync to Notion data catalog
|
|
79
|
+
```python
|
|
80
|
+
from dg_kit.integrations.notion.api import NotionDataCatalog
|
|
81
|
+
|
|
82
|
+
catalog = NotionDataCatalog(
|
|
83
|
+
notion_token="secret",
|
|
84
|
+
dc_table_id="data_source_id",
|
|
85
|
+
)
|
|
86
|
+
rows = catalog.pull()
|
|
87
|
+
print(len(rows))
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Development
|
|
91
|
+
Run tests:
|
|
92
|
+
```bash
|
|
93
|
+
pytest
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Export requirements with uv:
|
|
97
|
+
```bash
|
|
98
|
+
uv export --extra dbt --extra notion --group test -o requirements.txt
|
|
99
|
+
```
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
dg_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
dg_kit/base/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
dg_kit/base/business_information.py,sha256=zFnL_YgD1OzNoBrQXSn1DXBDuN5I6OLuGhOTSogINdA,2064
|
|
4
|
+
dg_kit/base/convention.py,sha256=ja0b8MXg5QiIIaACyNEnRJnGcUwgr7R-aT6GSmQP_ao,1586
|
|
5
|
+
dg_kit/base/data_catalog.py,sha256=yHNowIt-gqB3pcSHt3jppe8pLoAK_--csrx_aglEMKM,87
|
|
6
|
+
dg_kit/base/dataclasses/__init__.py,sha256=yqsgrqBySk1qV1EU8GxMrCu4tXvPDSvZq91VAmabx8Y,187
|
|
7
|
+
dg_kit/base/dataclasses/business_information.py,sha256=kH_fBYTwzlp9-DHoSRY2Hpe9jhHRA_0SN3mUFJVja-4,1802
|
|
8
|
+
dg_kit/base/dataclasses/convention.py,sha256=tvy3ZoiCq6aoz3Jj4Bh29t-FP5JhwPRgIanqxCqSIlI,761
|
|
9
|
+
dg_kit/base/dataclasses/data_catalog.py,sha256=hA07PNebUOH29sBgmAAgUNX9riwLHm-EEo8v4QgruqE,1803
|
|
10
|
+
dg_kit/base/dataclasses/logical_model.py,sha256=B8diZNTMydR91olKSy94jVe35w6NwWl0iRPjogj8mRE,2450
|
|
11
|
+
dg_kit/base/dataclasses/physical_model.py,sha256=mRmpTHnng6DwYe0i5ELk4AxQX2Cre3AvPP-YN4yn7_4,892
|
|
12
|
+
dg_kit/base/enums.py,sha256=XEwu86kN7nZRJdAgWTOWfwUVp4bPpxnnkxvy3PkTtY4,232
|
|
13
|
+
dg_kit/base/logical_model.py,sha256=Fvpi1_QW3QRe35J26COvlxGYpNyXfR4E-pPwKJM-eAY,2779
|
|
14
|
+
dg_kit/base/physical_model.py,sha256=4S891pXYVSunSjxd5x4eQiOU1ZJA3oAMDh1zqtcvwEo,1444
|
|
15
|
+
dg_kit/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
dg_kit/integrations/dbt/README.md,sha256=WGGaqKuqAqXtCmSlYHUFWLNHeLpzd2caCxOfkCocBEM,858
|
|
17
|
+
dg_kit/integrations/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
dg_kit/integrations/dbt/parser.py,sha256=yrLxp5wb4G2D56jW6JnPj4o_Yu9smLEUkizDV-ERhSE,7151
|
|
19
|
+
dg_kit/integrations/notion/README.md,sha256=c1DZsZmf5C_X_DgIwVfNnK3D7kb9oWPUcvs0U6Q2QG4,1168
|
|
20
|
+
dg_kit/integrations/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
dg_kit/integrations/notion/api.py,sha256=0sYYi8MKQRtfSb8hAMx8cadST84UKAjbx1kv1o-qUXU,18948
|
|
22
|
+
dg_kit/integrations/notion/formater.py,sha256=zrENNQY5DrYATwOj_QjPq4t9WkU1A966vHFzQn4hC-U,1928
|
|
23
|
+
dg_kit/integrations/odm/README.md,sha256=KZhy6E0PlGQNuXiKRxQGC_4hpMzllaUK9wX00WrCMpw,1449
|
|
24
|
+
dg_kit/integrations/odm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
|
+
dg_kit/integrations/odm/attr_types.py,sha256=TY5x4ilkdUQIKdPhHqxeO3Q-NrhKAVxjKnrCMBRkcTA,161
|
|
26
|
+
dg_kit/integrations/odm/parser.py,sha256=yNdm1becJfMCKapGmxti63CrH5usjwBG_-hvYZAKwI4,18938
|
|
27
|
+
dg_kit-0.1.0.dist-info/WHEEL,sha256=iHtWm8nRfs0VRdCYVXocAWFW8ppjHL-uTJkAdZJKOBM,80
|
|
28
|
+
dg_kit-0.1.0.dist-info/METADATA,sha256=YbKLty8TZITI1_Lfxji8HhyH0qOAXiqeIMecm8UPGLc,2553
|
|
29
|
+
dg_kit-0.1.0.dist-info/RECORD,,
|