dg-kit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dg_kit/__init__.py +0 -0
- dg_kit/base/__init__.py +0 -0
- dg_kit/base/business_information.py +60 -0
- dg_kit/base/convention.py +57 -0
- dg_kit/base/data_catalog.py +7 -0
- dg_kit/base/dataclasses/__init__.py +7 -0
- dg_kit/base/dataclasses/business_information.py +77 -0
- dg_kit/base/dataclasses/convention.py +30 -0
- dg_kit/base/dataclasses/data_catalog.py +64 -0
- dg_kit/base/dataclasses/logical_model.py +86 -0
- dg_kit/base/dataclasses/physical_model.py +38 -0
- dg_kit/base/enums.py +13 -0
- dg_kit/base/logical_model.py +66 -0
- dg_kit/base/physical_model.py +41 -0
- dg_kit/integrations/__init__.py +0 -0
- dg_kit/integrations/dbt/README.md +27 -0
- dg_kit/integrations/dbt/__init__.py +0 -0
- dg_kit/integrations/dbt/parser.py +202 -0
- dg_kit/integrations/notion/README.md +38 -0
- dg_kit/integrations/notion/__init__.py +0 -0
- dg_kit/integrations/notion/api.py +495 -0
- dg_kit/integrations/notion/formater.py +65 -0
- dg_kit/integrations/odm/README.md +46 -0
- dg_kit/integrations/odm/__init__.py +0 -0
- dg_kit/integrations/odm/attr_types.py +6 -0
- dg_kit/integrations/odm/parser.py +490 -0
- dg_kit-0.1.0.dist-info/METADATA +99 -0
- dg_kit-0.1.0.dist-info/RECORD +29 -0
- dg_kit-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, Dict
|
|
3
|
+
|
|
4
|
+
from notion_client import Client
|
|
5
|
+
|
|
6
|
+
from dg_kit.base.data_catalog import DataCatalog
|
|
7
|
+
from dg_kit.base.dataclasses.data_catalog import (
|
|
8
|
+
DataCatalogRow,
|
|
9
|
+
EntityTypeDataUnitPageInfo,
|
|
10
|
+
AttributeTypeDataUnitPageInfo,
|
|
11
|
+
RelationTypeDataUnitPageInfo,
|
|
12
|
+
)
|
|
13
|
+
from dg_kit.integrations.notion.formater import NotionFormater
|
|
14
|
+
|
|
15
|
+
from dg_kit.base.enums import DataUnitType
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class NotionDataCatalog(DataCatalog):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
notion_token: str,
|
|
22
|
+
dc_table_id: str,
|
|
23
|
+
prop_title: str = "Data unit",
|
|
24
|
+
prop_type: str = "Data unit type",
|
|
25
|
+
prop_data_unit_uuid: str = "Data unit uuid",
|
|
26
|
+
prop_domain: str = "Domain",
|
|
27
|
+
):
|
|
28
|
+
self.notion = Client(auth=notion_token)
|
|
29
|
+
self.dc_table_id = dc_table_id
|
|
30
|
+
self.prop_title = prop_title
|
|
31
|
+
self.prop_type = prop_type
|
|
32
|
+
self.prop_domain = prop_domain
|
|
33
|
+
self.prop_data_unit_uuid = prop_data_unit_uuid
|
|
34
|
+
self.rows: list[DataCatalogRow] = []
|
|
35
|
+
self.rows_by_id: Dict[str, DataCatalogRow] = {}
|
|
36
|
+
self.rows_by_name: Dict[str, DataCatalogRow] = {}
|
|
37
|
+
self.rows_by_page_id: Dict[str, DataCatalogRow] = {}
|
|
38
|
+
self.page_id_by_uuid: Dict[str, str] = {}
|
|
39
|
+
self.pull()
|
|
40
|
+
|
|
41
|
+
def _properties_from_row(self, row: DataCatalogRow) -> dict:
|
|
42
|
+
props = {
|
|
43
|
+
self.prop_title: {"title": [{"text": {"content": row.data_unit_name}}]},
|
|
44
|
+
self.prop_type: {"select": {"name": row.data_unit_type.value}},
|
|
45
|
+
self.prop_domain: {"select": {"name": row.domain}},
|
|
46
|
+
self.prop_data_unit_uuid: {
|
|
47
|
+
"rich_text": [{"type": "text", "text": {"content": row.data_unit_uuid}}]
|
|
48
|
+
},
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return props
|
|
52
|
+
|
|
53
|
+
def _list_child_block_ids(self, page_id: str) -> list[str]:
|
|
54
|
+
ids: list[str] = []
|
|
55
|
+
cursor: Optional[str] = None
|
|
56
|
+
|
|
57
|
+
while True:
|
|
58
|
+
resp = self.notion.blocks.children.list(
|
|
59
|
+
block_id=page_id, page_size=100, start_cursor=cursor
|
|
60
|
+
)
|
|
61
|
+
for b in resp.get("results", []):
|
|
62
|
+
bid = b.get("id")
|
|
63
|
+
if bid:
|
|
64
|
+
ids.append(bid)
|
|
65
|
+
|
|
66
|
+
if not resp.get("has_more"):
|
|
67
|
+
break
|
|
68
|
+
cursor = resp.get("next_cursor")
|
|
69
|
+
if not cursor:
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
return ids
|
|
73
|
+
|
|
74
|
+
def _overwrite_page_body(self, page_id: str, new_blocks: list[dict]) -> None:
|
|
75
|
+
# 1) delete all existing top-level blocks
|
|
76
|
+
for bid in self._list_child_block_ids(page_id):
|
|
77
|
+
self.notion.blocks.delete(
|
|
78
|
+
block_id=bid
|
|
79
|
+
) # archives block :contentReference[oaicite:2]{index=2}
|
|
80
|
+
|
|
81
|
+
# 2) append new blocks (<=100 per request) :contentReference[oaicite:3]{index=3}
|
|
82
|
+
for i in range(0, len(new_blocks), 100):
|
|
83
|
+
self.notion.blocks.children.append(
|
|
84
|
+
block_id=page_id, children=new_blocks[i : i + 100]
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def _build_entity_page_blocks(
|
|
88
|
+
self, data_unit_details: EntityTypeDataUnitPageInfo
|
|
89
|
+
) -> list[dict]:
|
|
90
|
+
# Build blocks (example)
|
|
91
|
+
blocks: list[dict] = []
|
|
92
|
+
|
|
93
|
+
# Description
|
|
94
|
+
blocks.append(NotionFormater._h2("Description:"))
|
|
95
|
+
if data_unit_details.description:
|
|
96
|
+
blocks.append(NotionFormater._para(data_unit_details.description))
|
|
97
|
+
else:
|
|
98
|
+
blocks.append(NotionFormater._para("—"))
|
|
99
|
+
|
|
100
|
+
# Identifiers
|
|
101
|
+
blocks.append(NotionFormater._h2("Primary Key attributes:"))
|
|
102
|
+
if data_unit_details.pk_attributes_page_ids:
|
|
103
|
+
for attribute_page_id in data_unit_details.pk_attributes_page_ids:
|
|
104
|
+
blocks.append(
|
|
105
|
+
NotionFormater._para_rich_text(
|
|
106
|
+
[NotionFormater._rt_page_mention(attribute_page_id)]
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
else:
|
|
110
|
+
blocks.append(NotionFormater._para("—"))
|
|
111
|
+
|
|
112
|
+
# Attributes
|
|
113
|
+
blocks.append(NotionFormater._h2("Attributes:"))
|
|
114
|
+
if data_unit_details.attributes_page_ids:
|
|
115
|
+
for attribute_page_id in data_unit_details.attributes_page_ids:
|
|
116
|
+
blocks.append(
|
|
117
|
+
NotionFormater._para_rich_text(
|
|
118
|
+
[NotionFormater._rt_page_mention(attribute_page_id)]
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
else:
|
|
122
|
+
blocks.append(NotionFormater._para("—"))
|
|
123
|
+
|
|
124
|
+
# Attributes
|
|
125
|
+
blocks.append(NotionFormater._h2("Relations:"))
|
|
126
|
+
if data_unit_details.relationes_page_ids:
|
|
127
|
+
for relation_page_id in data_unit_details.relationes_page_ids:
|
|
128
|
+
blocks.append(
|
|
129
|
+
NotionFormater._para_rich_text(
|
|
130
|
+
[NotionFormater._rt_page_mention(relation_page_id)]
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
else:
|
|
134
|
+
blocks.append(NotionFormater._para("—"))
|
|
135
|
+
|
|
136
|
+
# Linked docs
|
|
137
|
+
blocks.append(NotionFormater._h2("Linked docs:"))
|
|
138
|
+
if data_unit_details.linked_documents:
|
|
139
|
+
for document in data_unit_details.linked_documents:
|
|
140
|
+
document_link = NotionFormater._bullet(
|
|
141
|
+
[NotionFormater._rt_text(document.name, url=document.reference)]
|
|
142
|
+
)
|
|
143
|
+
blocks.append(document_link)
|
|
144
|
+
else:
|
|
145
|
+
blocks.append(NotionFormater._para("—"))
|
|
146
|
+
|
|
147
|
+
# Responsible parties
|
|
148
|
+
blocks.append(NotionFormater._h2("Responsible parties:"))
|
|
149
|
+
if data_unit_details.responsible_parties:
|
|
150
|
+
for party in data_unit_details.responsible_parties:
|
|
151
|
+
party_name = NotionFormater._bullet(
|
|
152
|
+
[NotionFormater._rt_text(party.name)]
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
blocks.append(party_name)
|
|
156
|
+
|
|
157
|
+
else:
|
|
158
|
+
blocks.append(NotionFormater._para("—"))
|
|
159
|
+
|
|
160
|
+
# Mapping to core layer tables
|
|
161
|
+
blocks.append(NotionFormater._h2("Core layer map:"))
|
|
162
|
+
if data_unit_details.core_layer_mapping:
|
|
163
|
+
for table in sorted(data_unit_details.core_layer_mapping):
|
|
164
|
+
blocks.append(
|
|
165
|
+
NotionFormater._bullet([NotionFormater._rt_text(table)])
|
|
166
|
+
) # or _rt_user_mention(not_user_id)
|
|
167
|
+
else:
|
|
168
|
+
blocks.append(NotionFormater._para("—"))
|
|
169
|
+
|
|
170
|
+
# Master source systems
|
|
171
|
+
blocks.append(NotionFormater._h2("Master source systems:"))
|
|
172
|
+
if data_unit_details.master_source_systems:
|
|
173
|
+
for source_system in sorted(data_unit_details.master_source_systems):
|
|
174
|
+
blocks.append(
|
|
175
|
+
NotionFormater._bullet([NotionFormater._rt_text(source_system)])
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
blocks.append(NotionFormater._para("—"))
|
|
179
|
+
|
|
180
|
+
return blocks
|
|
181
|
+
|
|
182
|
+
def _build_attribute_page_blocks(
|
|
183
|
+
self, data_unit_details: AttributeTypeDataUnitPageInfo
|
|
184
|
+
) -> list[dict]:
|
|
185
|
+
# Build blocks (example)
|
|
186
|
+
blocks: list[dict] = []
|
|
187
|
+
|
|
188
|
+
# Description
|
|
189
|
+
blocks.append(NotionFormater._h2("Description:"))
|
|
190
|
+
if data_unit_details.description:
|
|
191
|
+
blocks.append(NotionFormater._para(data_unit_details.description))
|
|
192
|
+
else:
|
|
193
|
+
blocks.append(NotionFormater._para("—"))
|
|
194
|
+
|
|
195
|
+
# Entity
|
|
196
|
+
blocks.append(NotionFormater._h2("Parent entity:"))
|
|
197
|
+
if data_unit_details.parent_entity_page_id:
|
|
198
|
+
blocks.append(
|
|
199
|
+
NotionFormater._para_rich_text(
|
|
200
|
+
[
|
|
201
|
+
NotionFormater._rt_page_mention(
|
|
202
|
+
data_unit_details.parent_entity_page_id
|
|
203
|
+
)
|
|
204
|
+
]
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
blocks.append(NotionFormater._para("—"))
|
|
209
|
+
|
|
210
|
+
# Identifiers
|
|
211
|
+
blocks.append(NotionFormater._h2("Data type:"))
|
|
212
|
+
if data_unit_details.data_type:
|
|
213
|
+
blocks.append(NotionFormater._para(data_unit_details.data_type))
|
|
214
|
+
else:
|
|
215
|
+
blocks.append(NotionFormater._para("—"))
|
|
216
|
+
|
|
217
|
+
# Identifiers
|
|
218
|
+
blocks.append(NotionFormater._h2("Sensetivity type:"))
|
|
219
|
+
if data_unit_details.sensitivity_type:
|
|
220
|
+
blocks.append(NotionFormater._para(data_unit_details.sensitivity_type))
|
|
221
|
+
else:
|
|
222
|
+
blocks.append(NotionFormater._para("—"))
|
|
223
|
+
|
|
224
|
+
# Linked docs
|
|
225
|
+
blocks.append(NotionFormater._h2("Linked docs:"))
|
|
226
|
+
if data_unit_details.linked_documents:
|
|
227
|
+
for document in data_unit_details.linked_documents:
|
|
228
|
+
document_link = NotionFormater._bullet(
|
|
229
|
+
[NotionFormater._rt_text(document.name, url=document.reference)]
|
|
230
|
+
)
|
|
231
|
+
blocks.append(document_link)
|
|
232
|
+
else:
|
|
233
|
+
blocks.append(NotionFormater._para("—"))
|
|
234
|
+
|
|
235
|
+
# Responsible parties
|
|
236
|
+
blocks.append(NotionFormater._h2("Responsible parties:"))
|
|
237
|
+
if data_unit_details.responsible_parties:
|
|
238
|
+
for party in data_unit_details.responsible_parties:
|
|
239
|
+
blocks.append(
|
|
240
|
+
NotionFormater._bullet([NotionFormater._rt_text(party.name)])
|
|
241
|
+
) # or _rt_user_mention(not_user_id)
|
|
242
|
+
else:
|
|
243
|
+
blocks.append(NotionFormater._para("—"))
|
|
244
|
+
|
|
245
|
+
# Mapping to core layer tables
|
|
246
|
+
blocks.append(NotionFormater._h2("Core layer map:"))
|
|
247
|
+
if data_unit_details.core_layer_mapping:
|
|
248
|
+
for table in sorted(data_unit_details.core_layer_mapping):
|
|
249
|
+
blocks.append(
|
|
250
|
+
NotionFormater._bullet([NotionFormater._rt_text(table)])
|
|
251
|
+
) # or _rt_user_mention(not_user_id)
|
|
252
|
+
else:
|
|
253
|
+
blocks.append(NotionFormater._para("—"))
|
|
254
|
+
|
|
255
|
+
# Master source systems
|
|
256
|
+
blocks.append(NotionFormater._h2("Master source systems:"))
|
|
257
|
+
if data_unit_details.master_source_systems:
|
|
258
|
+
for source_system in sorted(data_unit_details.master_source_systems):
|
|
259
|
+
blocks.append(
|
|
260
|
+
NotionFormater._bullet([NotionFormater._rt_text(source_system)])
|
|
261
|
+
)
|
|
262
|
+
else:
|
|
263
|
+
blocks.append(NotionFormater._para("—"))
|
|
264
|
+
|
|
265
|
+
return blocks
|
|
266
|
+
|
|
267
|
+
def _build_relation_page_blocks(
|
|
268
|
+
self, data_unit_details: RelationTypeDataUnitPageInfo
|
|
269
|
+
) -> list[dict]:
|
|
270
|
+
# Build blocks (example)
|
|
271
|
+
blocks: list[dict] = []
|
|
272
|
+
|
|
273
|
+
# Description
|
|
274
|
+
blocks.append(NotionFormater._h2("Description:"))
|
|
275
|
+
if data_unit_details.description:
|
|
276
|
+
blocks.append(NotionFormater._para(data_unit_details.description))
|
|
277
|
+
else:
|
|
278
|
+
blocks.append(NotionFormater._para("—"))
|
|
279
|
+
|
|
280
|
+
# Source entity
|
|
281
|
+
blocks.append(NotionFormater._h2("Source entity:"))
|
|
282
|
+
if data_unit_details.source_entity_page_id:
|
|
283
|
+
blocks.append(
|
|
284
|
+
NotionFormater._para_rich_text(
|
|
285
|
+
[
|
|
286
|
+
NotionFormater._rt_page_mention(
|
|
287
|
+
data_unit_details.source_entity_page_id
|
|
288
|
+
)
|
|
289
|
+
]
|
|
290
|
+
)
|
|
291
|
+
)
|
|
292
|
+
else:
|
|
293
|
+
blocks.append(NotionFormater._para("—"))
|
|
294
|
+
|
|
295
|
+
# Target entity
|
|
296
|
+
blocks.append(NotionFormater._h2("Target entity:"))
|
|
297
|
+
if data_unit_details.target_entity_page_id:
|
|
298
|
+
blocks.append(
|
|
299
|
+
NotionFormater._para_rich_text(
|
|
300
|
+
[
|
|
301
|
+
NotionFormater._rt_page_mention(
|
|
302
|
+
data_unit_details.target_entity_page_id
|
|
303
|
+
)
|
|
304
|
+
]
|
|
305
|
+
)
|
|
306
|
+
)
|
|
307
|
+
else:
|
|
308
|
+
blocks.append(NotionFormater._para("—"))
|
|
309
|
+
|
|
310
|
+
# Linked docs
|
|
311
|
+
blocks.append(NotionFormater._h2("Linked docs:"))
|
|
312
|
+
if data_unit_details.linked_documents:
|
|
313
|
+
for document in data_unit_details.linked_documents:
|
|
314
|
+
document_link = NotionFormater._bullet(
|
|
315
|
+
[NotionFormater._rt_text(document.name, url=document.reference)]
|
|
316
|
+
)
|
|
317
|
+
blocks.append(document_link)
|
|
318
|
+
else:
|
|
319
|
+
blocks.append(NotionFormater._para("—"))
|
|
320
|
+
|
|
321
|
+
# Responsible parties
|
|
322
|
+
blocks.append(NotionFormater._h2("Responsible parties:"))
|
|
323
|
+
if data_unit_details.responsible_parties:
|
|
324
|
+
for party in data_unit_details.responsible_parties:
|
|
325
|
+
blocks.append(
|
|
326
|
+
NotionFormater._bullet([NotionFormater._rt_text(party.name)])
|
|
327
|
+
) # or _rt_user_mention(not_user_id)
|
|
328
|
+
else:
|
|
329
|
+
blocks.append(NotionFormater._para("—"))
|
|
330
|
+
|
|
331
|
+
# Mapping to core layer tables
|
|
332
|
+
blocks.append(NotionFormater._h2("Core layer map:"))
|
|
333
|
+
if data_unit_details.core_layer_mapping:
|
|
334
|
+
for table in sorted(data_unit_details.core_layer_mapping):
|
|
335
|
+
blocks.append(
|
|
336
|
+
NotionFormater._bullet([NotionFormater._rt_text(table)])
|
|
337
|
+
) # or _rt_user_mention(not_user_id)
|
|
338
|
+
else:
|
|
339
|
+
blocks.append(NotionFormater._para("—"))
|
|
340
|
+
|
|
341
|
+
# Master source systems
|
|
342
|
+
blocks.append(NotionFormater._h2("Master source systems:"))
|
|
343
|
+
if data_unit_details.master_source_systems:
|
|
344
|
+
for source_system in sorted(data_unit_details.master_source_systems):
|
|
345
|
+
blocks.append(
|
|
346
|
+
NotionFormater._bullet([NotionFormater._rt_text(source_system)])
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
blocks.append(NotionFormater._para("—"))
|
|
350
|
+
|
|
351
|
+
return blocks
|
|
352
|
+
|
|
353
|
+
def pull(self, limit: Optional[int] = None) -> list[DataCatalogRow]:
|
|
354
|
+
start_cursor: Optional[str] = None
|
|
355
|
+
fetched = 0
|
|
356
|
+
|
|
357
|
+
while True:
|
|
358
|
+
page_size = 100 if limit is None else min(100, max(1, limit - fetched))
|
|
359
|
+
if limit is not None and page_size <= 0:
|
|
360
|
+
break
|
|
361
|
+
|
|
362
|
+
payload: dict = {"data_source_id": self.dc_table_id, "page_size": page_size}
|
|
363
|
+
if start_cursor:
|
|
364
|
+
payload["start_cursor"] = start_cursor
|
|
365
|
+
|
|
366
|
+
resp = self.notion.data_sources.query(**payload)
|
|
367
|
+
|
|
368
|
+
for page in resp["results"]:
|
|
369
|
+
props = page["properties"]
|
|
370
|
+
|
|
371
|
+
data_unit_uuid = NotionFormater._safe_rich_text(
|
|
372
|
+
props.get(self.prop_data_unit_uuid, {})
|
|
373
|
+
)
|
|
374
|
+
title = NotionFormater._safe_title(props.get(self.prop_title, {}))
|
|
375
|
+
domain = NotionFormater._safe_rich_text(props.get(self.prop_domain, {}))
|
|
376
|
+
typ = props[self.prop_type]["select"]["name"]
|
|
377
|
+
|
|
378
|
+
row = DataCatalogRow(
|
|
379
|
+
data_unit_uuid=data_unit_uuid,
|
|
380
|
+
data_unit_name=title,
|
|
381
|
+
data_unit_type=DataUnitType(typ),
|
|
382
|
+
domain=domain,
|
|
383
|
+
last_edited_time=page.get("last_edited_time"),
|
|
384
|
+
created_time=page.get("created_time"),
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
self.rows.append(row)
|
|
388
|
+
|
|
389
|
+
self.rows_by_id[data_unit_uuid] = row
|
|
390
|
+
self.rows_by_name[title] = row
|
|
391
|
+
self.rows_by_page_id[page["id"]] = row
|
|
392
|
+
self.page_id_by_uuid[data_unit_uuid] = page["id"]
|
|
393
|
+
|
|
394
|
+
fetched += len(resp.get("results", []))
|
|
395
|
+
if not resp.get("has_more"):
|
|
396
|
+
break
|
|
397
|
+
start_cursor = resp.get("next_cursor")
|
|
398
|
+
if not start_cursor:
|
|
399
|
+
break
|
|
400
|
+
|
|
401
|
+
return self.rows
|
|
402
|
+
|
|
403
|
+
def get_by_name(self, name: str) -> DataCatalogRow:
|
|
404
|
+
return self.rows_by_name[name]
|
|
405
|
+
|
|
406
|
+
def get_by_id(self, id: str) -> DataCatalogRow:
|
|
407
|
+
return self.rows_by_id[id]
|
|
408
|
+
|
|
409
|
+
def _get_by_page_id(self, page_id: str) -> DataCatalogRow:
|
|
410
|
+
return self.rows_by_page_id[page_id]
|
|
411
|
+
|
|
412
|
+
def update_page_by_uuid(self, uuid: str, data_unit_details: DataCatalogRow) -> None:
|
|
413
|
+
page_id = self.page_id_by_uuid[uuid]
|
|
414
|
+
|
|
415
|
+
# 2) Update page body
|
|
416
|
+
if data_unit_details.data_unit_type == DataUnitType.ENTITY:
|
|
417
|
+
blocks = self._build_entity_page_blocks(data_unit_details)
|
|
418
|
+
elif data_unit_details.data_unit_type == DataUnitType.ATTRIBUTE:
|
|
419
|
+
blocks = self._build_attribute_page_blocks(data_unit_details)
|
|
420
|
+
elif data_unit_details.data_unit_type == DataUnitType.RELATION:
|
|
421
|
+
blocks = self._build_relation_page_blocks(data_unit_details)
|
|
422
|
+
else:
|
|
423
|
+
raise ValueError(
|
|
424
|
+
f"Unsupported data unit type: {data_unit_details.data_unit_type}"
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
self._overwrite_page_body(page_id, blocks)
|
|
428
|
+
|
|
429
|
+
def update_properties_by_uuid(
|
|
430
|
+
self, uuid: str, data_catalog_row: DataCatalogRow
|
|
431
|
+
) -> None:
|
|
432
|
+
page_id = self.page_id_by_uuid[uuid]
|
|
433
|
+
|
|
434
|
+
props = self._properties_from_row(data_catalog_row)
|
|
435
|
+
self.notion.pages.update(page_id=page_id, properties=props)
|
|
436
|
+
|
|
437
|
+
self.rows_by_id[data_catalog_row.data_unit_uuid] = data_catalog_row
|
|
438
|
+
self.rows_by_name[data_catalog_row.data_unit_name] = data_catalog_row
|
|
439
|
+
self.rows.append(data_catalog_row)
|
|
440
|
+
|
|
441
|
+
def delete_by_id(self, data_unit_uuid: str) -> None:
|
|
442
|
+
# Find page by external id
|
|
443
|
+
resp = self.notion.data_sources.query(
|
|
444
|
+
data_source_id=self.dc_table_id,
|
|
445
|
+
page_size=1,
|
|
446
|
+
filter={
|
|
447
|
+
"property": self.prop_data_unit_uuid,
|
|
448
|
+
"rich_text": {"equals": data_unit_uuid},
|
|
449
|
+
},
|
|
450
|
+
)
|
|
451
|
+
results = resp.get("results", [])
|
|
452
|
+
if not results:
|
|
453
|
+
raise KeyError(f"No row found for data_unit_uuid='{data_unit_uuid}'")
|
|
454
|
+
page_id = results[0]["id"]
|
|
455
|
+
|
|
456
|
+
# Delete page
|
|
457
|
+
self.notion.pages.update(page_id=page_id, archived=True)
|
|
458
|
+
|
|
459
|
+
self.pull()
|
|
460
|
+
|
|
461
|
+
def delete_by_page_id(self, page_id: str) -> None:
|
|
462
|
+
# Delete page
|
|
463
|
+
self.notion.pages.update(page_id=page_id, archived=True)
|
|
464
|
+
|
|
465
|
+
self.pull()
|
|
466
|
+
|
|
467
|
+
def add_data_unit(self, data_catalog_row: DataCatalogRow) -> None:
|
|
468
|
+
# 1) If already exists -> overwrite instead of creating duplicate
|
|
469
|
+
resp = self.notion.data_sources.query(
|
|
470
|
+
data_source_id=self.dc_table_id,
|
|
471
|
+
page_size=1,
|
|
472
|
+
filter={
|
|
473
|
+
"property": self.prop_data_unit_uuid,
|
|
474
|
+
"rich_text": {"equals": data_catalog_row.data_unit_uuid},
|
|
475
|
+
},
|
|
476
|
+
)
|
|
477
|
+
results = resp.get("results", [])
|
|
478
|
+
if results:
|
|
479
|
+
raise KeyError(
|
|
480
|
+
f"Data unit with id='{data_catalog_row.data_unit_uuid}' already exists. Use update instead."
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
# 2) Create the page (properties only)
|
|
484
|
+
page = self.notion.pages.create(
|
|
485
|
+
parent={"type": "data_source_id", "data_source_id": self.dc_table_id},
|
|
486
|
+
properties=self._properties_from_row(data_catalog_row),
|
|
487
|
+
)
|
|
488
|
+
page_id = page["id"]
|
|
489
|
+
|
|
490
|
+
# 3) Update in-memory indexes without calling pull() (avoids duplicates from pull())
|
|
491
|
+
self.rows.append(data_catalog_row)
|
|
492
|
+
self.rows_by_id[data_catalog_row.data_unit_uuid] = data_catalog_row
|
|
493
|
+
self.rows_by_name[data_catalog_row.data_unit_name] = data_catalog_row
|
|
494
|
+
self.rows_by_page_id[page_id] = data_catalog_row
|
|
495
|
+
self.page_id_by_uuid[data_catalog_row.data_unit_uuid] = page_id
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class NotionFormater:
|
|
6
|
+
@staticmethod
|
|
7
|
+
def _safe_title(prop: dict) -> str:
|
|
8
|
+
items = prop.get("title") or []
|
|
9
|
+
return (items[0].get("plain_text") or "") if items else ""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def _safe_rich_text(prop: dict) -> str:
|
|
13
|
+
items = prop.get("rich_text") or []
|
|
14
|
+
return (items[0].get("plain_text") or "") if items else ""
|
|
15
|
+
|
|
16
|
+
@staticmethod
|
|
17
|
+
def _rt_text(text: str, url: Optional[str] = None) -> dict:
|
|
18
|
+
rt = {"type": "text", "text": {"content": text}}
|
|
19
|
+
if url:
|
|
20
|
+
rt["text"]["link"] = {"url": url}
|
|
21
|
+
return rt
|
|
22
|
+
|
|
23
|
+
@staticmethod
|
|
24
|
+
def _rt_user_mention(user_id: str) -> dict:
|
|
25
|
+
return {"type": "mention", "mention": {"user": {"id": user_id}}}
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def _rt_page_mention(page_id: str) -> dict:
|
|
29
|
+
return {"type": "mention", "mention": {"page": {"id": page_id}}}
|
|
30
|
+
|
|
31
|
+
@staticmethod
|
|
32
|
+
def _h2(text: str) -> dict:
|
|
33
|
+
return {
|
|
34
|
+
"object": "block",
|
|
35
|
+
"type": "heading_2",
|
|
36
|
+
"heading_2": {"rich_text": [NotionFormater._rt_text(text)]},
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def _para(text: str) -> dict:
|
|
41
|
+
return {
|
|
42
|
+
"object": "block",
|
|
43
|
+
"type": "paragraph",
|
|
44
|
+
"paragraph": {"rich_text": [NotionFormater._rt_text(text)]},
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
@staticmethod
|
|
48
|
+
def _para_rich_text(rich_text: list[dict]) -> dict:
|
|
49
|
+
return {
|
|
50
|
+
"object": "block",
|
|
51
|
+
"type": "paragraph",
|
|
52
|
+
"paragraph": {"rich_text": rich_text},
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@staticmethod
|
|
56
|
+
def _bullet(rich_text: list[dict]) -> dict:
|
|
57
|
+
return {
|
|
58
|
+
"object": "block",
|
|
59
|
+
"type": "bulleted_list_item",
|
|
60
|
+
"bulleted_list_item": {"rich_text": rich_text},
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _cell(text: str) -> list[dict]:
|
|
65
|
+
return [{"type": "text", "text": {"content": text or ""}}]
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
## Oracle Data Modeler Integration
|
|
2
|
+
|
|
3
|
+
Parses Oracle Data Modeler (ODM) project exports and converts them into `dg_kit` core objects:
|
|
4
|
+
- Logical Model (`LogicalModel`)
|
|
5
|
+
- Business Information (`BusinessInformation`)
|
|
6
|
+
|
|
7
|
+
This integration is useful for extracting governance metadata from ODM and using it in CI checks, data catalog sync, or internal tooling.
|
|
8
|
+
|
|
9
|
+
## Requirements
|
|
10
|
+
- Oracle Data Modeler project in `.dmd` format
|
|
11
|
+
- The `.dmd` file must sit next to a folder with the same base name that contains the exported assets (ODM’s standard structure)
|
|
12
|
+
|
|
13
|
+
Expected layout:
|
|
14
|
+
```
|
|
15
|
+
MyModel.dmd
|
|
16
|
+
MyModel/
|
|
17
|
+
logical/
|
|
18
|
+
businessinfo/
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
```python
|
|
23
|
+
from dg_kit.integrations.odm.parser import ODMParser
|
|
24
|
+
|
|
25
|
+
parser = ODMParser("path/to/MyModel.dmd")
|
|
26
|
+
|
|
27
|
+
bi = parser.parse_bi()
|
|
28
|
+
lm = parser.parse_lm()
|
|
29
|
+
|
|
30
|
+
print(lm.version, len(lm.entities))
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Versioned Projects
|
|
34
|
+
If you keep multiple `.dmd` files in a folder (one per version), use `ODMVersionedProjectParser`:
|
|
35
|
+
```python
|
|
36
|
+
from dg_kit.integrations.odm.parser import ODMVersionedProjectParser
|
|
37
|
+
|
|
38
|
+
parser = ODMVersionedProjectParser("path/to/odm_versions_folder")
|
|
39
|
+
model = parser.get_model("MyModel")
|
|
40
|
+
bi = parser.get_bi("MyModel")
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Notes
|
|
44
|
+
- Business information includes documents, contacts, teams, emails, and URLs extracted from ODM.
|
|
45
|
+
- Logical model entities, attributes, and relations are built from ODM XML assets.
|
|
46
|
+
- Dynamic ODM properties are used for fields like `domain`, `pm_map`, and `master_source_systems`.
|
|
File without changes
|