dg-kit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dg_kit/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+ from typing import Dict
3
+
4
+
5
+ from dg_kit.base.dataclasses.business_information import (
6
+ Team,
7
+ Contact,
8
+ Document,
9
+ Email,
10
+ Url,
11
+ )
12
+
13
+
14
+ class BusinessInformation:
15
+ def __init__(self, version: str):
16
+ self.version = version
17
+ self.teams: Dict[str, Team] = {}
18
+ self.contacts: Dict[str, Contact] = {}
19
+ self.documents: Dict[str, Document] = {}
20
+ self.emails: Dict[str, Email] = {}
21
+ self.urls: Dict[str, Url] = {}
22
+ self.all_units_by_id: Dict[str, Team | Contact | Document | Email | Url] = {}
23
+ self.all_units_by_natural_key: Dict[
24
+ str, Team | Contact | Document | Email | Url
25
+ ] = {}
26
+
27
+ def register_team(self, team: Team) -> None:
28
+ self.teams[team.id] = team
29
+ self.all_units_by_id[team.id] = team
30
+ self.all_units_by_natural_key[team.natural_key] = team
31
+
32
+ def register_contact(self, contact: Contact) -> None:
33
+ self.contacts[contact.id] = contact
34
+ self.all_units_by_id[contact.id] = contact
35
+ self.all_units_by_natural_key[contact.natural_key] = contact
36
+
37
+ def register_document(self, document: Document) -> None:
38
+ self.documents[document.id] = document
39
+ self.all_units_by_id[document.id] = document
40
+ self.all_units_by_natural_key[document.natural_key] = document
41
+
42
+ def register_email(self, email: Email) -> None:
43
+ self.emails[email.id] = email
44
+ self.all_units_by_id[email.id] = email
45
+ self.all_units_by_natural_key[email.natural_key] = email
46
+
47
+ def register_url(self, url: Url) -> None:
48
+ self.urls[url.id] = url
49
+ self.all_units_by_id[url.id] = url
50
+ self.all_units_by_natural_key[url.natural_key] = url
51
+
52
+
53
+ class BusinessInformationDatabase:
54
+ def __init__(self):
55
+ self.business_information: Dict[str, BusinessInformation] = {}
56
+
57
+ def register_business_information(
58
+ self, business_information: BusinessInformation
59
+ ) -> None:
60
+ self.business_information[business_information.version] = business_information
@@ -0,0 +1,57 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+
5
+ from dg_kit.base.physical_model import PhysicalModel
6
+ from dg_kit.base.logical_model import LogicalModel
7
+ from dg_kit.base.enums import ConventionRuleSeverity
8
+ from dg_kit.base.dataclasses.convention import (
9
+ ConventionRule,
10
+ ConventionRuleFn,
11
+ ConventionBreach,
12
+ )
13
+
14
+
15
+ class Convention:
16
+ def __init__(self, name: str):
17
+ self.name = name
18
+ self._rules: List[ConventionRule] = []
19
+
20
+ def rule(
21
+ self,
22
+ name: str,
23
+ severity: ConventionRuleSeverity,
24
+ description: str,
25
+ ):
26
+ def decorator(fn: ConventionRuleFn) -> ConventionRuleFn:
27
+ self._rules.append(ConventionRule(name, severity, description, fn))
28
+ return fn
29
+
30
+ return decorator
31
+
32
+ @property
33
+ def rules(self) -> List[ConventionRule]:
34
+ return list(self._rules)
35
+
36
+
37
+ class ConventionValidator:
38
+ def __init__(self, lm: LogicalModel, pm: PhysicalModel, convention: Convention):
39
+ self.lm = lm
40
+ self.pm = pm
41
+ self.convention = convention
42
+
43
+ def validate(self) -> List[ConventionBreach]:
44
+ issues: List[ConventionBreach] = []
45
+
46
+ for rule in self.convention.rules:
47
+ res = rule.fn(self.lm, self.pm)
48
+ for issue in res:
49
+ issue = ConventionBreach(
50
+ severity=rule.severity,
51
+ message=issue.message,
52
+ unit_id=issue.unit_id,
53
+ unit_natural_key=issue.unit_natural_key,
54
+ )
55
+ issues.append(issue)
56
+
57
+ return issues
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+
4
+ from abc import ABC
5
+
6
+
7
+ class DataCatalog(ABC): ...
@@ -0,0 +1,7 @@
1
+ import hashlib
2
+
3
+
4
+ def id_generator(*parts: str, size: int = 16) -> str:
5
+ h = hashlib.blake2b(digest_size=size)
6
+ h.update("\x1f".join(parts).encode("utf-8"))
7
+ return h.hexdigest()
@@ -0,0 +1,77 @@
1
+ """
2
+ Docstring for dg_kit.base.dataclasses.business_information
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from dataclasses import dataclass, field
8
+ from typing import Optional, Tuple
9
+
10
+ from dg_kit.base.dataclasses import id_generator
11
+
12
+
13
+ @dataclass(frozen=True, slots=True)
14
+ class SlackChannelUrl:
15
+ id: str = field(init=False)
16
+ natural_key: str
17
+ name: str
18
+ url: Optional[str]
19
+
20
+ def __post_init__(self) -> None:
21
+ object.__setattr__(self, "id", id_generator(self.natural_key))
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class Email:
26
+ id: str = field(init=False)
27
+ natural_key: str
28
+ name: str
29
+ email_address: Optional[str]
30
+
31
+ def __post_init__(self) -> None:
32
+ object.__setattr__(self, "id", id_generator(self.natural_key))
33
+
34
+
35
+ @dataclass(frozen=True, slots=True)
36
+ class Url:
37
+ id: str = field(init=False)
38
+ natural_key: str
39
+ name: str
40
+ url: Optional[str]
41
+
42
+ def __post_init__(self) -> None:
43
+ object.__setattr__(self, "id", id_generator(self.natural_key))
44
+
45
+
46
+ @dataclass(frozen=True, slots=True)
47
+ class Contact:
48
+ id: str = field(init=False)
49
+ natural_key: str
50
+ name: str
51
+ emails: Tuple[Email, ...]
52
+ urls: Tuple[Url, ...]
53
+
54
+ def __post_init__(self) -> None:
55
+ object.__setattr__(self, "id", id_generator(self.natural_key))
56
+
57
+
58
+ @dataclass(frozen=True, slots=True)
59
+ class Team:
60
+ id: str = field(init=False)
61
+ natural_key: str
62
+ name: str
63
+ contacts: Tuple[Contact, ...]
64
+
65
+ def __post_init__(self) -> None:
66
+ object.__setattr__(self, "id", id_generator(self.natural_key))
67
+
68
+
69
+ @dataclass(frozen=True, slots=True)
70
+ class Document:
71
+ id: str = field(init=False)
72
+ natural_key: str
73
+ name: str
74
+ reference: Optional[str]
75
+
76
+ def __post_init__(self) -> None:
77
+ object.__setattr__(self, "id", id_generator(self.natural_key))
@@ -0,0 +1,30 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional, Protocol, Set
5
+
6
+ from dg_kit.base.enums import ConventionRuleSeverity
7
+ from dg_kit.base.logical_model import LogicalModel
8
+ from dg_kit.base.physical_model import PhysicalModel
9
+
10
+
11
+ @dataclass(frozen=True, slots=True)
12
+ class ConventionBreach:
13
+ severity: ConventionRuleSeverity
14
+ message: str
15
+ unit_id: Optional[str] = None
16
+ unit_natural_key: Optional[str] = None
17
+
18
+
19
+ class ConventionRuleFn(Protocol):
20
+ def __call__(
21
+ self, lm: LogicalModel, pm: PhysicalModel
22
+ ) -> Set[ConventionBreach]: ...
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class ConventionRule:
27
+ name: str
28
+ severity: ConventionRuleSeverity
29
+ description: str
30
+ fn: ConventionRuleFn
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Tuple
4
+
5
+ from datetime import datetime
6
+
7
+ from dataclasses import dataclass
8
+
9
+ from dg_kit.base.enums import DataUnitType
10
+ from dg_kit.base.dataclasses.business_information import Document, Team
11
+
12
+
13
+ @dataclass(frozen=True, slots=True)
14
+ class DataCatalogRow:
15
+ data_unit_name: str
16
+ data_unit_type: DataUnitType
17
+ domain: str
18
+ data_unit_uuid: str
19
+ last_edited_time: Optional[datetime] = None
20
+ created_time: Optional[datetime] = None
21
+
22
+ def __post_init__(self) -> None: ...
23
+
24
+
25
+ @dataclass(frozen=True, slots=True)
26
+ class EntityTypeDataUnitPageInfo:
27
+ data_unit_type: DataUnitType
28
+ description: str
29
+ linked_documents: Tuple[Document, ...]
30
+ responsible_parties: Tuple[Team, ...]
31
+ master_source_systems: Tuple[str, ...]
32
+ core_layer_mapping: Tuple[str, ...]
33
+ pk_attributes_page_ids: Tuple[str, ...]
34
+ attributes_page_ids: Tuple[str, ...]
35
+ relationes_page_ids: Tuple[str, ...]
36
+
37
+
38
+ @dataclass(frozen=True, slots=True)
39
+ class AttributeTypeDataUnitPageInfo:
40
+ parent_entity_page_id: str
41
+ data_unit_type: DataUnitType
42
+ description: str
43
+ data_type: str
44
+ sensitivity_type: str
45
+ linked_documents: Tuple[Document, ...]
46
+ responsible_parties: Tuple[Team, ...]
47
+ core_layer_mapping: Tuple[str, ...]
48
+ master_source_systems: Tuple[str, ...]
49
+
50
+
51
+ @dataclass(frozen=True, slots=True)
52
+ class RelationTypeDataUnitPageInfo:
53
+ data_unit_type: DataUnitType
54
+ description: str
55
+ linked_documents: Tuple[Document, ...]
56
+ responsible_parties: Tuple[Team, ...]
57
+ core_layer_mapping: Tuple[str, ...]
58
+ master_source_systems: Tuple[str, ...]
59
+ source_entity_page_id: str
60
+ target_entity_page_id: str
61
+ optional_source: bool
62
+ optional_target: bool
63
+ source_cardinality: str
64
+ target_cardinality: str
@@ -0,0 +1,86 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from typing import Optional, Tuple
6
+
7
+ from dg_kit.base.dataclasses import id_generator
8
+ from dg_kit.base.dataclasses.business_information import Team, Document
9
+
10
+
11
+ @dataclass(frozen=True, slots=True)
12
+ class EntityIdentifier:
13
+ id: str = field(init=False)
14
+ natural_key: str
15
+ name: Optional[str]
16
+ is_pk: bool
17
+ entity_id: str
18
+ used_attributes_ids: Tuple[str, ...]
19
+
20
+ def __post_init__(self) -> None:
21
+ object.__setattr__(self, "id", id_generator(self.natural_key))
22
+
23
+
24
+ @dataclass(frozen=True, slots=True)
25
+ class Relation:
26
+ id: str = field(init=False)
27
+ natural_key: str
28
+ name: str
29
+ domain: Optional[str]
30
+ description: str
31
+ pm_map: Tuple[str, ...]
32
+ master_source_systems: Tuple[str, ...]
33
+ responsible_parties: Tuple[Team, ...]
34
+ documents: Tuple[Document, ...]
35
+ source_entity_id: str
36
+ target_entity_id: str
37
+ optional_source: Optional[bool]
38
+ optional_target: Optional[bool]
39
+ source_cardinality: Optional[str]
40
+ target_cardinality: Optional[str]
41
+ created_by: Optional[str] = None
42
+ created_time: Optional[datetime] = None
43
+
44
+ def __post_init__(self) -> None:
45
+ object.__setattr__(self, "id", id_generator(self.natural_key))
46
+
47
+
48
+ @dataclass(frozen=True, slots=True)
49
+ class Attribute:
50
+ id: str = field(init=False)
51
+ natural_key: str
52
+ name: str
53
+ domain: Optional[str]
54
+ description: str
55
+ sensitivity_type: str
56
+ data_type: str
57
+ pm_map: Tuple[str, ...]
58
+ master_source_systems: Tuple[str, ...]
59
+ responsible_parties: Tuple[Team, ...]
60
+ documents: Tuple[Document, ...]
61
+ entity_id: str
62
+ created_by: Optional[str] = None
63
+ created_time: Optional[datetime] = None
64
+
65
+ def __post_init__(self) -> None:
66
+ object.__setattr__(self, "id", id_generator(self.natural_key))
67
+
68
+
69
+ @dataclass(frozen=True, slots=True)
70
+ class Entity:
71
+ id: str = field(init=False)
72
+ natural_key: str
73
+ name: str
74
+ domain: Optional[str]
75
+ description: str
76
+ identifiers: Tuple[EntityIdentifier, ...]
77
+ attributes: Tuple[str, ...]
78
+ pm_map: Tuple[str, ...]
79
+ master_source_systems: Tuple[str, ...]
80
+ responsible_parties: Tuple[Team, ...]
81
+ documents: Tuple[Document, ...]
82
+ created_by: Optional[str] = None
83
+ created_time: Optional[datetime] = None
84
+
85
+ def __post_init__(self) -> None:
86
+ object.__setattr__(self, "id", id_generator(self.natural_key))
@@ -0,0 +1,38 @@
1
+ from dataclasses import dataclass, field
2
+ from dg_kit.base.dataclasses import id_generator
3
+
4
+
5
+ @dataclass(frozen=True, slots=True)
6
+ class Layer:
7
+ id: str = field(init=False)
8
+ natural_key: str
9
+ name: str
10
+ is_landing: bool
11
+
12
+ def __post_init__(self) -> None:
13
+ object.__setattr__(self, "id", id_generator(self.natural_key))
14
+
15
+
16
+ @dataclass(frozen=True, slots=True)
17
+ class Table:
18
+ id: str = field(init=False)
19
+ natural_key: str
20
+ layer_id: str
21
+ name: str
22
+
23
+ def __post_init__(self) -> None:
24
+ object.__setattr__(self, "id", id_generator(self.natural_key))
25
+
26
+
27
+ @dataclass(frozen=True, slots=True)
28
+ class Column:
29
+ id: str = field(init=False)
30
+ natural_key: str
31
+ layer_id: str
32
+ table_id: str
33
+ name: str
34
+ data_type: str
35
+ description: str = ""
36
+
37
+ def __post_init__(self) -> None:
38
+ object.__setattr__(self, "id", id_generator(self.natural_key))
dg_kit/base/enums.py ADDED
@@ -0,0 +1,13 @@
1
+ from enum import StrEnum
2
+
3
+
4
+ class ConventionRuleSeverity(StrEnum):
5
+ INFO = "info"
6
+ WARN = "warning"
7
+ ERROR = "error"
8
+
9
+
10
+ class DataUnitType(StrEnum):
11
+ ENTITY = "entity"
12
+ ATTRIBUTE = "attribute"
13
+ RELATION = "relation"
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+ from typing import Dict, List
3
+
4
+
5
+ from dg_kit.base.dataclasses.logical_model import Entity, Attribute, Relation
6
+
7
+
8
+ class LogicalModel:
9
+ def __init__(self, version: str):
10
+ self.version = version
11
+ self.entities: Dict[str, Entity] = {}
12
+ self.attributes: Dict[str, Attribute] = {}
13
+ self.dependencies: dict[str, set[str]] = {}
14
+ self.relations: Dict[str, Relation] = {}
15
+ self.relations_ids_by_entity_id: Dict[str, List[Relation]] = {}
16
+ self.all_units_by_id: Dict[str, Entity | Attribute | Relation] = {}
17
+ self.all_units_by_natural_key: Dict[str, Entity | Attribute | Relation] = {}
18
+ self.pm_objects_nks_used: set[str] = set()
19
+ self.all_lm_units_by_odm_id: Dict[str, Entity | Attribute | Relation] = {}
20
+
21
+ def register_entity(self, entity: Entity) -> None:
22
+ self.entities[entity.id] = entity
23
+ self.all_units_by_id[entity.id] = entity
24
+ self.all_units_by_natural_key[entity.natural_key] = entity
25
+
26
+ for nk in entity.pm_map:
27
+ self.pm_objects_nks_used.add(nk)
28
+
29
+ def register_attribute(self, attribute: Attribute) -> None:
30
+ self.attributes[attribute.id] = attribute
31
+ self.all_units_by_id[attribute.id] = attribute
32
+ self.all_units_by_natural_key[attribute.natural_key] = attribute
33
+
34
+ for nk in attribute.pm_map:
35
+ self.pm_objects_nks_used.add(nk)
36
+
37
+ def register_relation(self, relation: Relation) -> None:
38
+ self.relations[relation.id] = relation
39
+ self.all_units_by_id[relation.id] = relation
40
+ self.all_units_by_natural_key[relation.natural_key] = relation
41
+ if relation.source_entity_id in self.relations_ids_by_entity_id:
42
+ self.relations_ids_by_entity_id[relation.source_entity_id].append(relation)
43
+ else:
44
+ self.relations_ids_by_entity_id[relation.source_entity_id] = [relation]
45
+
46
+ if relation.target_entity_id in self.relations_ids_by_entity_id:
47
+ self.relations_ids_by_entity_id[relation.target_entity_id].append(relation)
48
+ else:
49
+ self.relations_ids_by_entity_id[relation.target_entity_id] = [relation]
50
+
51
+ for nk in relation.pm_map:
52
+ self.pm_objects_nks_used.add(nk)
53
+
54
+ def register_dependency(self, dependent: Entity, dependency: Attribute) -> None:
55
+ if dependent.id in self.dependencies:
56
+ self.dependencies[dependent.id].add(dependency.id)
57
+ else:
58
+ self.dependencies[dependent.id] = {dependency.id}
59
+
60
+
61
+ class LogicalModelsDatabase:
62
+ def __init__(self):
63
+ self.logical_models: Dict[str, LogicalModel] = {}
64
+
65
+ def register_logical_model(self, logical_model: LogicalModel) -> None:
66
+ self.logical_models[logical_model.version] = logical_model
@@ -0,0 +1,41 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict
4
+
5
+ from dg_kit.base.dataclasses.physical_model import (
6
+ Table,
7
+ Column,
8
+ Layer,
9
+ )
10
+
11
+
12
+ class PhysicalModel:
13
+ def __init__(self, version):
14
+ self.version = version
15
+ self.layers: Dict[str, Layer] = {}
16
+ self.tables: Dict[str, Table] = {}
17
+ self.columns: Dict[str, Column] = {}
18
+ self.dependencies: dict[str, set[str]] = {}
19
+ self.all_units_by_id: Dict[str, Layer | Table | Column] = {}
20
+ self.all_units_by_natural_key: Dict[str, Layer | Table | Column] = {}
21
+
22
+ def register_layer(self, layer: Layer):
23
+ self.layers[layer.id] = layer
24
+ self.all_units_by_id[layer.id] = layer
25
+ self.all_units_by_natural_key[layer.natural_key] = layer
26
+
27
+ def register_table(self, table: Table) -> None:
28
+ self.tables[table.id] = table
29
+ self.all_units_by_id[table.id] = table
30
+ self.all_units_by_natural_key[table.natural_key] = table
31
+
32
+ def register_column(self, column: Column) -> None:
33
+ self.columns[column.id] = column
34
+ self.all_units_by_id[column.id] = column
35
+ self.all_units_by_natural_key[column.natural_key] = column
36
+
37
+ def register_dependency(self, dependent: Table, dependency: Table) -> None:
38
+ if dependent.id in self.dependencies:
39
+ self.dependencies[dependent.id].add(dependency.id)
40
+ else:
41
+ self.dependencies[dependent.id] = {dependency.id}
File without changes
@@ -0,0 +1,27 @@
1
+ ## dbt Integration
2
+
3
+ Parses a dbt project into a `PhysicalModel` with layers, tables, columns, and dependencies.
4
+
5
+ This integration is useful for extracting governance metadata from dbt and feeding it into
6
+ CI checks, documentation, or data catalog syncs.
7
+
8
+ ## Requirements
9
+ - A dbt project directory with `dbt_project.yml`
10
+ - A `models/` directory containing model `.yml` and `.sql` files
11
+
12
+ ## Usage
13
+ ```python
14
+ from dg_kit.integrations.dbt.parser import DBTParser
15
+
16
+ pm = DBTParser("path/to/dbt_project").parse_pm()
17
+
18
+ print(pm.version, len(pm.layers), len(pm.tables))
19
+ ```
20
+
21
+ ## What It Parses
22
+ - Sources defined in top-level `models/*.yml`
23
+ - Models and columns from layer `models/<layer>/*.yml`
24
+ - Dependencies from `ref()` and `source()` calls in `models/**/*.sql`
25
+
26
+ ## Notes
27
+ - Dependencies are registered only when referenced models/sources exist in the parsed metadata.
File without changes