acryl-datahub 1.0.0rc9__py3-none-any.whl → 1.0.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/METADATA +2445 -2446
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/RECORD +36 -35
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/emitter/mce_builder.py +28 -13
- datahub/ingestion/source/common/subtypes.py +7 -0
- datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
- datahub/ingestion/source/identity/okta.py +22 -0
- datahub/ingestion/source/metabase.py +3 -3
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/mode.py +1 -1
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +7 -4
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +125 -33
- datahub/ingestion/source/redshift/redshift.py +41 -72
- datahub/ingestion/source/redshift/redshift_schema.py +166 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +10 -4
- datahub/ingestion/source/sql/oracle.py +93 -63
- datahub/metadata/_schema_classes.py +5 -5
- datahub/metadata/_urns/urn_defs.py +24 -0
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import re
|
|
2
3
|
from dataclasses import dataclass, field
|
|
3
4
|
from datetime import datetime, timezone
|
|
4
|
-
from typing import Dict, Iterable, List, Optional, Tuple
|
|
5
|
+
from typing import Dict, Iterable, List, Optional, Tuple, Union
|
|
5
6
|
|
|
6
7
|
import redshift_connector
|
|
7
8
|
|
|
@@ -41,6 +42,10 @@ class RedshiftTable(BaseTable):
|
|
|
41
42
|
serde_parameters: Optional[str] = None
|
|
42
43
|
last_altered: Optional[datetime] = None
|
|
43
44
|
|
|
45
|
+
@property
|
|
46
|
+
def is_external_table(self) -> bool:
|
|
47
|
+
return self.type == "EXTERNAL_TABLE"
|
|
48
|
+
|
|
44
49
|
|
|
45
50
|
@dataclass
|
|
46
51
|
class RedshiftView(BaseTable):
|
|
@@ -51,6 +56,10 @@ class RedshiftView(BaseTable):
|
|
|
51
56
|
size_in_bytes: Optional[int] = None
|
|
52
57
|
rows_count: Optional[int] = None
|
|
53
58
|
|
|
59
|
+
@property
|
|
60
|
+
def is_external_table(self) -> bool:
|
|
61
|
+
return self.type == "EXTERNAL_TABLE"
|
|
62
|
+
|
|
54
63
|
|
|
55
64
|
@dataclass
|
|
56
65
|
class RedshiftSchema:
|
|
@@ -59,8 +68,102 @@ class RedshiftSchema:
|
|
|
59
68
|
type: str
|
|
60
69
|
owner: Optional[str] = None
|
|
61
70
|
option: Optional[str] = None
|
|
71
|
+
external_platform: Optional[str] = None
|
|
62
72
|
external_database: Optional[str] = None
|
|
63
73
|
|
|
74
|
+
@property
|
|
75
|
+
def is_external_schema(self) -> bool:
|
|
76
|
+
return self.type == "external"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class PartialInboundDatashare:
|
|
81
|
+
share_name: str
|
|
82
|
+
producer_namespace_prefix: str
|
|
83
|
+
consumer_database: str
|
|
84
|
+
|
|
85
|
+
def get_description(self) -> str:
|
|
86
|
+
return (
|
|
87
|
+
f"Namespace Prefix {self.producer_namespace_prefix} Share {self.share_name}"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class OutboundDatashare:
|
|
93
|
+
share_name: str
|
|
94
|
+
producer_namespace: str
|
|
95
|
+
source_database: str
|
|
96
|
+
|
|
97
|
+
def get_key(self) -> str:
|
|
98
|
+
return f"{self.producer_namespace}.{self.share_name}"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class InboundDatashare:
|
|
103
|
+
share_name: str
|
|
104
|
+
producer_namespace: str
|
|
105
|
+
consumer_database: str
|
|
106
|
+
|
|
107
|
+
def get_key(self) -> str:
|
|
108
|
+
return f"{self.producer_namespace}.{self.share_name}"
|
|
109
|
+
|
|
110
|
+
def get_description(self) -> str:
|
|
111
|
+
return f"Namespace {self.producer_namespace} Share {self.share_name}"
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class RedshiftDatabase:
|
|
116
|
+
name: str
|
|
117
|
+
type: str
|
|
118
|
+
options: Optional[str] = None
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def is_shared_database(self) -> bool:
|
|
122
|
+
return self.type == "shared"
|
|
123
|
+
|
|
124
|
+
# NOTE: ideally options are in form
|
|
125
|
+
# {"datashare_name":"xxx","datashare_producer_account":"1234","datashare_producer_namespace":"yyy"}
|
|
126
|
+
# however due to varchar(128) type of database table that captures options
|
|
127
|
+
# we may receive only partial information about inbound share
|
|
128
|
+
def get_inbound_share(
|
|
129
|
+
self,
|
|
130
|
+
) -> Optional[Union[InboundDatashare, PartialInboundDatashare]]:
|
|
131
|
+
if not self.is_shared_database or not self.options:
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
# Convert into single regex ??
|
|
135
|
+
share_name_match = re.search(r'"datashare_name"\s*:\s*"([^"]*)"', self.options)
|
|
136
|
+
namespace_match = re.search(
|
|
137
|
+
r'"datashare_producer_namespace"\s*:\s*"([^"]*)"', self.options
|
|
138
|
+
)
|
|
139
|
+
partial_namespace_match = re.search(
|
|
140
|
+
r'"datashare_producer_namespace"\s*:\s*"([^"]*)$', self.options
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
if not share_name_match:
|
|
144
|
+
# We will always at least get share name
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
share_name = share_name_match.group(1)
|
|
148
|
+
if namespace_match:
|
|
149
|
+
return InboundDatashare(
|
|
150
|
+
share_name=share_name,
|
|
151
|
+
producer_namespace=namespace_match.group(1),
|
|
152
|
+
consumer_database=self.name,
|
|
153
|
+
)
|
|
154
|
+
elif partial_namespace_match:
|
|
155
|
+
return PartialInboundDatashare(
|
|
156
|
+
share_name=share_name,
|
|
157
|
+
producer_namespace_prefix=partial_namespace_match.group(1),
|
|
158
|
+
consumer_database=self.name,
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
return PartialInboundDatashare(
|
|
162
|
+
share_name=share_name,
|
|
163
|
+
producer_namespace_prefix="",
|
|
164
|
+
consumer_database=self.name,
|
|
165
|
+
)
|
|
166
|
+
|
|
64
167
|
|
|
65
168
|
@dataclass
|
|
66
169
|
class RedshiftExtraTableMeta:
|
|
@@ -141,13 +244,31 @@ class RedshiftDataDictionary:
|
|
|
141
244
|
|
|
142
245
|
return [db[0] for db in dbs]
|
|
143
246
|
|
|
247
|
+
@staticmethod
|
|
248
|
+
def get_database_details(
|
|
249
|
+
conn: redshift_connector.Connection, database: str
|
|
250
|
+
) -> Optional[RedshiftDatabase]:
|
|
251
|
+
cursor = RedshiftDataDictionary.get_query_result(
|
|
252
|
+
conn,
|
|
253
|
+
RedshiftCommonQuery.get_database_details(database),
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
row = cursor.fetchone()
|
|
257
|
+
if row is None:
|
|
258
|
+
return None
|
|
259
|
+
return RedshiftDatabase(
|
|
260
|
+
name=database,
|
|
261
|
+
type=row[1],
|
|
262
|
+
options=row[2],
|
|
263
|
+
)
|
|
264
|
+
|
|
144
265
|
@staticmethod
|
|
145
266
|
def get_schemas(
|
|
146
267
|
conn: redshift_connector.Connection, database: str
|
|
147
268
|
) -> List[RedshiftSchema]:
|
|
148
269
|
cursor = RedshiftDataDictionary.get_query_result(
|
|
149
270
|
conn,
|
|
150
|
-
RedshiftCommonQuery.list_schemas
|
|
271
|
+
RedshiftCommonQuery.list_schemas(database),
|
|
151
272
|
)
|
|
152
273
|
|
|
153
274
|
schemas = cursor.fetchall()
|
|
@@ -158,8 +279,8 @@ class RedshiftDataDictionary:
|
|
|
158
279
|
database=database,
|
|
159
280
|
name=schema[field_names.index("schema_name")],
|
|
160
281
|
type=schema[field_names.index("schema_type")],
|
|
161
|
-
owner=schema[field_names.index("schema_owner_name")],
|
|
162
282
|
option=schema[field_names.index("schema_option")],
|
|
283
|
+
external_platform=schema[field_names.index("external_platform")],
|
|
163
284
|
external_database=schema[field_names.index("external_database")],
|
|
164
285
|
)
|
|
165
286
|
for schema in schemas
|
|
@@ -203,6 +324,7 @@ class RedshiftDataDictionary:
|
|
|
203
324
|
self,
|
|
204
325
|
conn: redshift_connector.Connection,
|
|
205
326
|
skip_external_tables: bool = False,
|
|
327
|
+
is_shared_database: bool = False,
|
|
206
328
|
) -> Tuple[Dict[str, List[RedshiftTable]], Dict[str, List[RedshiftView]]]:
|
|
207
329
|
tables: Dict[str, List[RedshiftTable]] = {}
|
|
208
330
|
views: Dict[str, List[RedshiftView]] = {}
|
|
@@ -213,7 +335,10 @@ class RedshiftDataDictionary:
|
|
|
213
335
|
|
|
214
336
|
cur = RedshiftDataDictionary.get_query_result(
|
|
215
337
|
conn,
|
|
216
|
-
RedshiftCommonQuery.list_tables(
|
|
338
|
+
RedshiftCommonQuery.list_tables(
|
|
339
|
+
skip_external_tables=skip_external_tables,
|
|
340
|
+
is_shared_database=is_shared_database,
|
|
341
|
+
),
|
|
217
342
|
)
|
|
218
343
|
field_names = [i[0] for i in cur.description]
|
|
219
344
|
db_tables = cur.fetchall()
|
|
@@ -358,11 +483,15 @@ class RedshiftDataDictionary:
|
|
|
358
483
|
|
|
359
484
|
@staticmethod
|
|
360
485
|
def get_columns_for_schema(
|
|
361
|
-
conn: redshift_connector.Connection,
|
|
486
|
+
conn: redshift_connector.Connection,
|
|
487
|
+
schema: RedshiftSchema,
|
|
488
|
+
is_shared_database: bool = False,
|
|
362
489
|
) -> Dict[str, List[RedshiftColumn]]:
|
|
363
490
|
cursor = RedshiftDataDictionary.get_query_result(
|
|
364
491
|
conn,
|
|
365
|
-
RedshiftCommonQuery.list_columns
|
|
492
|
+
RedshiftCommonQuery.list_columns(
|
|
493
|
+
is_shared_database=is_shared_database
|
|
494
|
+
).format(schema_name=schema.name),
|
|
366
495
|
)
|
|
367
496
|
|
|
368
497
|
table_columns: Dict[str, List[RedshiftColumn]] = {}
|
|
@@ -508,3 +637,34 @@ class RedshiftDataDictionary:
|
|
|
508
637
|
start_time=row[field_names.index("start_time")],
|
|
509
638
|
)
|
|
510
639
|
rows = cursor.fetchmany()
|
|
640
|
+
|
|
641
|
+
@staticmethod
|
|
642
|
+
def get_outbound_datashares(
|
|
643
|
+
conn: redshift_connector.Connection,
|
|
644
|
+
) -> Iterable[OutboundDatashare]:
|
|
645
|
+
cursor = conn.cursor()
|
|
646
|
+
cursor.execute(RedshiftCommonQuery.list_outbound_datashares())
|
|
647
|
+
for item in cursor.fetchall():
|
|
648
|
+
yield OutboundDatashare(
|
|
649
|
+
share_name=item[1],
|
|
650
|
+
producer_namespace=item[2],
|
|
651
|
+
source_database=item[3],
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
# NOTE: this is not used right now as it requires superuser privilege
|
|
655
|
+
# We can use this in future if the permissions are lowered.
|
|
656
|
+
@staticmethod
|
|
657
|
+
def get_inbound_datashare(
|
|
658
|
+
conn: redshift_connector.Connection,
|
|
659
|
+
database: str,
|
|
660
|
+
) -> Optional[InboundDatashare]:
|
|
661
|
+
cursor = conn.cursor()
|
|
662
|
+
cursor.execute(RedshiftCommonQuery.get_inbound_datashare(database))
|
|
663
|
+
item = cursor.fetchone()
|
|
664
|
+
if item:
|
|
665
|
+
return InboundDatashare(
|
|
666
|
+
share_name=item[1],
|
|
667
|
+
producer_namespace=item[2],
|
|
668
|
+
consumer_database=item[3],
|
|
669
|
+
)
|
|
670
|
+
return None
|
|
@@ -60,5 +60,8 @@ class RedshiftReport(
|
|
|
60
60
|
sql_aggregator: Optional[SqlAggregatorReport] = None
|
|
61
61
|
lineage_phases_timer: Dict[str, PerfTimer] = field(default_factory=dict)
|
|
62
62
|
|
|
63
|
+
is_shared_database: bool = False
|
|
64
|
+
outbound_shares_count: Optional[int] = None
|
|
65
|
+
|
|
63
66
|
def report_dropped(self, key: str) -> None:
|
|
64
67
|
self.filtered.append(key)
|
|
@@ -11,12 +11,17 @@ from datahub.emitter.mcp_builder import (
|
|
|
11
11
|
DatabaseKey,
|
|
12
12
|
SchemaKey,
|
|
13
13
|
)
|
|
14
|
+
from datahub.ingestion.source.common.subtypes import (
|
|
15
|
+
FlowContainerSubTypes,
|
|
16
|
+
JobContainerSubTypes,
|
|
17
|
+
)
|
|
14
18
|
from datahub.metadata.schema_classes import (
|
|
15
19
|
ContainerClass,
|
|
16
20
|
DataFlowInfoClass,
|
|
17
21
|
DataJobInfoClass,
|
|
18
22
|
DataJobInputOutputClass,
|
|
19
23
|
DataPlatformInstanceClass,
|
|
24
|
+
SubTypesClass,
|
|
20
25
|
)
|
|
21
26
|
|
|
22
27
|
|
|
@@ -211,6 +216,18 @@ class MSSQLDataJob:
|
|
|
211
216
|
status=self.status,
|
|
212
217
|
)
|
|
213
218
|
|
|
219
|
+
@property
|
|
220
|
+
def as_subtypes_aspect(self) -> SubTypesClass:
|
|
221
|
+
assert isinstance(self.entity, (JobStep, StoredProcedure))
|
|
222
|
+
type = (
|
|
223
|
+
JobContainerSubTypes.MSSQL_JOBSTEP
|
|
224
|
+
if isinstance(self.entity, JobStep)
|
|
225
|
+
else JobContainerSubTypes.MSSQL_STORED_PROCEDURE
|
|
226
|
+
)
|
|
227
|
+
return SubTypesClass(
|
|
228
|
+
typeNames=[type],
|
|
229
|
+
)
|
|
230
|
+
|
|
214
231
|
@property
|
|
215
232
|
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
|
|
216
233
|
if self.entity.flow.platform_instance:
|
|
@@ -276,6 +293,18 @@ class MSSQLDataFlow:
|
|
|
276
293
|
externalUrl=self.external_url,
|
|
277
294
|
)
|
|
278
295
|
|
|
296
|
+
@property
|
|
297
|
+
def as_subtypes_aspect(self) -> SubTypesClass:
|
|
298
|
+
assert isinstance(self.entity, (MSSQLJob, MSSQLProceduresContainer))
|
|
299
|
+
type = (
|
|
300
|
+
FlowContainerSubTypes.MSSQL_JOB
|
|
301
|
+
if isinstance(self.entity, MSSQLJob)
|
|
302
|
+
else FlowContainerSubTypes.MSSQL_PROCEDURE_CONTAINER
|
|
303
|
+
)
|
|
304
|
+
return SubTypesClass(
|
|
305
|
+
typeNames=[type],
|
|
306
|
+
)
|
|
307
|
+
|
|
279
308
|
@property
|
|
280
309
|
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
|
|
281
310
|
if self.entity.platform_instance:
|
|
@@ -638,6 +638,11 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
638
638
|
aspect=data_job.as_datajob_info_aspect,
|
|
639
639
|
).as_workunit()
|
|
640
640
|
|
|
641
|
+
yield MetadataChangeProposalWrapper(
|
|
642
|
+
entityUrn=data_job.urn,
|
|
643
|
+
aspect=data_job.as_subtypes_aspect,
|
|
644
|
+
).as_workunit()
|
|
645
|
+
|
|
641
646
|
data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect
|
|
642
647
|
if data_platform_instance_aspect:
|
|
643
648
|
yield MetadataChangeProposalWrapper(
|
|
@@ -676,8 +681,6 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
676
681
|
),
|
|
677
682
|
).as_workunit()
|
|
678
683
|
|
|
679
|
-
# TODO: Add SubType when it appear
|
|
680
|
-
|
|
681
684
|
def construct_flow_workunits(
|
|
682
685
|
self,
|
|
683
686
|
data_flow: MSSQLDataFlow,
|
|
@@ -687,6 +690,11 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
687
690
|
aspect=data_flow.as_dataflow_info_aspect,
|
|
688
691
|
).as_workunit()
|
|
689
692
|
|
|
693
|
+
yield MetadataChangeProposalWrapper(
|
|
694
|
+
entityUrn=data_flow.urn,
|
|
695
|
+
aspect=data_flow.as_subtypes_aspect,
|
|
696
|
+
).as_workunit()
|
|
697
|
+
|
|
690
698
|
data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect
|
|
691
699
|
if data_platform_instance_aspect:
|
|
692
700
|
yield MetadataChangeProposalWrapper(
|
|
@@ -700,8 +708,6 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
700
708
|
aspect=data_flow.as_container_aspect,
|
|
701
709
|
).as_workunit()
|
|
702
710
|
|
|
703
|
-
# TODO: Add SubType when it appear
|
|
704
|
-
|
|
705
711
|
def get_inspectors(self) -> Iterable[Inspector]:
|
|
706
712
|
# This method can be overridden in the case that you want to dynamically
|
|
707
713
|
# run on multiple databases.
|
|
@@ -152,6 +152,7 @@ class OracleInspectorObjectWrapper:
|
|
|
152
152
|
self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX")
|
|
153
153
|
|
|
154
154
|
def get_db_name(self) -> str:
|
|
155
|
+
db_name = None
|
|
155
156
|
try:
|
|
156
157
|
# Try to retrieve current DB name by executing query
|
|
157
158
|
db_name = self._inspector_instance.bind.execute(
|
|
@@ -159,7 +160,12 @@ class OracleInspectorObjectWrapper:
|
|
|
159
160
|
).scalar()
|
|
160
161
|
return str(db_name)
|
|
161
162
|
except sqlalchemy.exc.DatabaseError as e:
|
|
162
|
-
|
|
163
|
+
self.report.failure(
|
|
164
|
+
title="Error fetching database name using sys_context.",
|
|
165
|
+
message="database_fetch_error",
|
|
166
|
+
context=db_name,
|
|
167
|
+
exc=e,
|
|
168
|
+
)
|
|
163
169
|
return ""
|
|
164
170
|
|
|
165
171
|
def get_schema_names(self) -> List[str]:
|
|
@@ -326,8 +332,8 @@ class OracleInspectorObjectWrapper:
|
|
|
326
332
|
try:
|
|
327
333
|
coltype = ischema_names[coltype]()
|
|
328
334
|
except KeyError:
|
|
329
|
-
logger.
|
|
330
|
-
f"
|
|
335
|
+
logger.info(
|
|
336
|
+
f"Unrecognized column datatype {coltype} of column {colname}"
|
|
331
337
|
)
|
|
332
338
|
coltype = sqltypes.NULLTYPE
|
|
333
339
|
|
|
@@ -379,8 +385,8 @@ class OracleInspectorObjectWrapper:
|
|
|
379
385
|
COMMENT_SQL = """
|
|
380
386
|
SELECT comments
|
|
381
387
|
FROM dba_tab_comments
|
|
382
|
-
WHERE table_name =
|
|
383
|
-
AND owner =
|
|
388
|
+
WHERE table_name = :table_name
|
|
389
|
+
AND owner = :schema_name
|
|
384
390
|
"""
|
|
385
391
|
|
|
386
392
|
c = self._inspector_instance.bind.execute(
|
|
@@ -397,79 +403,93 @@ class OracleInspectorObjectWrapper:
|
|
|
397
403
|
|
|
398
404
|
text = (
|
|
399
405
|
"SELECT"
|
|
400
|
-
"\nac.constraint_name,"
|
|
401
|
-
"\nac.constraint_type,"
|
|
402
|
-
"\
|
|
403
|
-
"\
|
|
404
|
-
"\
|
|
405
|
-
"\
|
|
406
|
-
"\
|
|
407
|
-
"\
|
|
408
|
-
"\nac.search_condition,"
|
|
409
|
-
"\nac.delete_rule"
|
|
410
|
-
"\nFROM dba_constraints
|
|
411
|
-
"\
|
|
412
|
-
"\
|
|
413
|
-
"\
|
|
414
|
-
"\nAND ac.
|
|
406
|
+
"\nac.constraint_name,"
|
|
407
|
+
"\nac.constraint_type,"
|
|
408
|
+
"\nacc.column_name AS local_column,"
|
|
409
|
+
"\nNULL AS remote_table,"
|
|
410
|
+
"\nNULL AS remote_column,"
|
|
411
|
+
"\nNULL AS remote_owner,"
|
|
412
|
+
"\nacc.position AS loc_pos,"
|
|
413
|
+
"\nNULL AS rem_pos,"
|
|
414
|
+
"\nac.search_condition,"
|
|
415
|
+
"\nac.delete_rule"
|
|
416
|
+
"\nFROM dba_constraints ac"
|
|
417
|
+
"\nJOIN dba_cons_columns acc"
|
|
418
|
+
"\nON ac.owner = acc.owner"
|
|
419
|
+
"\nAND ac.constraint_name = acc.constraint_name"
|
|
420
|
+
"\nAND ac.table_name = acc.table_name"
|
|
421
|
+
"\nWHERE ac.table_name = :table_name"
|
|
422
|
+
"\nAND ac.constraint_type IN ('P', 'U', 'C')"
|
|
415
423
|
)
|
|
416
424
|
|
|
417
425
|
if schema is not None:
|
|
418
426
|
params["owner"] = schema
|
|
419
|
-
text += "\nAND ac.owner =
|
|
427
|
+
text += "\nAND ac.owner = :owner"
|
|
420
428
|
|
|
429
|
+
# Splitting into queries with UNION ALL for execution efficiency
|
|
421
430
|
text += (
|
|
422
|
-
"\
|
|
423
|
-
"\
|
|
424
|
-
"\
|
|
425
|
-
"\
|
|
426
|
-
"\
|
|
427
|
-
"\
|
|
431
|
+
"\nUNION ALL"
|
|
432
|
+
"\nSELECT"
|
|
433
|
+
"\nac.constraint_name,"
|
|
434
|
+
"\nac.constraint_type,"
|
|
435
|
+
"\nacc.column_name AS local_column,"
|
|
436
|
+
"\nac.r_table_name AS remote_table,"
|
|
437
|
+
"\nrcc.column_name AS remote_column,"
|
|
438
|
+
"\nac.r_owner AS remote_owner,"
|
|
439
|
+
"\nacc.position AS loc_pos,"
|
|
440
|
+
"\nrcc.position AS rem_pos,"
|
|
441
|
+
"\nac.search_condition,"
|
|
442
|
+
"\nac.delete_rule"
|
|
443
|
+
"\nFROM dba_constraints ac"
|
|
444
|
+
"\nJOIN dba_cons_columns acc"
|
|
445
|
+
"\nON ac.owner = acc.owner"
|
|
446
|
+
"\nAND ac.constraint_name = acc.constraint_name"
|
|
447
|
+
"\nAND ac.table_name = acc.table_name"
|
|
448
|
+
"\nLEFT JOIN dba_cons_columns rcc"
|
|
449
|
+
"\nON ac.r_owner = rcc.owner"
|
|
450
|
+
"\nAND ac.r_constraint_name = rcc.constraint_name"
|
|
451
|
+
"\nAND acc.position = rcc.position"
|
|
452
|
+
"\nWHERE ac.table_name = :table_name"
|
|
453
|
+
"\nAND ac.constraint_type = 'R'"
|
|
428
454
|
)
|
|
429
455
|
|
|
430
|
-
|
|
456
|
+
if schema is not None:
|
|
457
|
+
text += "\nAND ac.owner = :owner"
|
|
458
|
+
|
|
459
|
+
text += "\nORDER BY constraint_name, loc_pos"
|
|
460
|
+
|
|
431
461
|
rp = self._inspector_instance.bind.execute(sql.text(text), params)
|
|
432
|
-
|
|
433
|
-
return constraint_data
|
|
462
|
+
return rp.fetchall()
|
|
434
463
|
|
|
435
464
|
def get_pk_constraint(
|
|
436
465
|
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
|
|
437
466
|
) -> Dict:
|
|
438
|
-
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
|
439
|
-
table_name
|
|
440
|
-
)
|
|
441
|
-
assert denormalized_table_name
|
|
442
|
-
|
|
443
|
-
schema = self._inspector_instance.dialect.denormalize_name(
|
|
444
|
-
schema or self.default_schema_name
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
if schema is None:
|
|
448
|
-
schema = self._inspector_instance.dialect.default_schema_name
|
|
449
|
-
|
|
450
467
|
pkeys = []
|
|
451
468
|
constraint_name = None
|
|
452
|
-
constraint_data = self._get_constraint_data(
|
|
453
|
-
denormalized_table_name, schema, dblink
|
|
454
|
-
)
|
|
455
469
|
|
|
456
|
-
|
|
457
|
-
(
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
470
|
+
try:
|
|
471
|
+
for row in self._get_constraint_data(table_name, schema, dblink):
|
|
472
|
+
if row[1] == "P": # constraint_type is 'P' for primary key
|
|
473
|
+
if constraint_name is None:
|
|
474
|
+
constraint_name = (
|
|
475
|
+
self._inspector_instance.dialect.normalize_name(row[0])
|
|
476
|
+
)
|
|
477
|
+
col_name = self._inspector_instance.dialect.normalize_name(
|
|
478
|
+
row[2]
|
|
479
|
+
) # local_column
|
|
480
|
+
pkeys.append(col_name)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
self.report.warning(
|
|
483
|
+
title="Failed to Process Primary Keys",
|
|
484
|
+
message=(
|
|
485
|
+
f"Unable to process primary key constraints for {schema}.{table_name}. "
|
|
486
|
+
"Ensure SELECT access on DBA_CONSTRAINTS and DBA_CONS_COLUMNS.",
|
|
487
|
+
),
|
|
488
|
+
context=f"{schema}.{table_name}",
|
|
489
|
+
exc=e,
|
|
466
490
|
)
|
|
467
|
-
if
|
|
468
|
-
|
|
469
|
-
constraint_name = self._inspector_instance.dialect.normalize_name(
|
|
470
|
-
cons_name
|
|
471
|
-
)
|
|
472
|
-
pkeys.append(local_column)
|
|
491
|
+
# Return empty constraint if we can't process it
|
|
492
|
+
return {"constrained_columns": [], "name": None}
|
|
473
493
|
|
|
474
494
|
return {"constrained_columns": pkeys, "name": constraint_name}
|
|
475
495
|
|
|
@@ -527,6 +547,16 @@ class OracleInspectorObjectWrapper:
|
|
|
527
547
|
f"dba_cons_columns{dblink} - does the user have "
|
|
528
548
|
"proper rights to the table?"
|
|
529
549
|
)
|
|
550
|
+
self.report.warning(
|
|
551
|
+
title="Missing Table Permissions",
|
|
552
|
+
message=(
|
|
553
|
+
f"Unable to query table_name from dba_cons_columns{dblink}. "
|
|
554
|
+
"This usually indicates insufficient permissions on the target table. "
|
|
555
|
+
f"Foreign key relationships will not be detected for {schema}.{table_name}. "
|
|
556
|
+
"Please ensure the user has SELECT privileges on dba_cons_columns."
|
|
557
|
+
),
|
|
558
|
+
context=f"{schema}.{table_name}",
|
|
559
|
+
)
|
|
530
560
|
|
|
531
561
|
rec = fkeys[cons_name]
|
|
532
562
|
rec["name"] = cons_name
|
|
@@ -573,8 +603,8 @@ class OracleInspectorObjectWrapper:
|
|
|
573
603
|
text = "SELECT text FROM dba_views WHERE view_name=:view_name"
|
|
574
604
|
|
|
575
605
|
if schema is not None:
|
|
576
|
-
|
|
577
|
-
|
|
606
|
+
params["owner"] = schema
|
|
607
|
+
text += "\nAND owner = :owner"
|
|
578
608
|
|
|
579
609
|
rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar()
|
|
580
610
|
|
|
@@ -15486,7 +15486,7 @@ class DomainKeyClass(_Aspect):
|
|
|
15486
15486
|
|
|
15487
15487
|
|
|
15488
15488
|
ASPECT_NAME = 'domainKey'
|
|
15489
|
-
ASPECT_INFO = {'keyForEntity': 'domain', 'entityCategory': 'core', 'entityAspects': ['domainProperties', 'institutionalMemory', 'ownership', 'structuredProperties', 'forms', 'testResults'], 'entityDoc': 'A data domain within an organization.'}
|
|
15489
|
+
ASPECT_INFO = {'keyForEntity': 'domain', 'entityCategory': 'core', 'entityAspects': ['domainProperties', 'institutionalMemory', 'ownership', 'structuredProperties', 'forms', 'testResults', 'displayProperties'], 'entityDoc': 'A data domain within an organization.'}
|
|
15490
15490
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DomainKey")
|
|
15491
15491
|
|
|
15492
15492
|
def __init__(self,
|
|
@@ -15631,7 +15631,7 @@ class GlossaryNodeKeyClass(_Aspect):
|
|
|
15631
15631
|
|
|
15632
15632
|
|
|
15633
15633
|
ASPECT_NAME = 'glossaryNodeKey'
|
|
15634
|
-
ASPECT_INFO = {'keyForEntity': 'glossaryNode', 'entityCategory': 'core', 'entityAspects': ['glossaryNodeInfo', 'institutionalMemory', 'ownership', 'status', 'structuredProperties', 'forms', 'testResults', 'subTypes']}
|
|
15634
|
+
ASPECT_INFO = {'keyForEntity': 'glossaryNode', 'entityCategory': 'core', 'entityAspects': ['glossaryNodeInfo', 'institutionalMemory', 'ownership', 'status', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'displayProperties']}
|
|
15635
15635
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey")
|
|
15636
15636
|
|
|
15637
15637
|
def __init__(self,
|
|
@@ -15831,7 +15831,7 @@ class MLModelDeploymentKeyClass(_Aspect):
|
|
|
15831
15831
|
|
|
15832
15832
|
|
|
15833
15833
|
ASPECT_NAME = 'mlModelDeploymentKey'
|
|
15834
|
-
ASPECT_INFO = {'keyForEntity': 'mlModelDeployment', 'entityCategory': 'core', 'entityAspects': ['mlModelDeploymentProperties', 'ownership', 'status', 'deprecation', 'globalTags', 'dataPlatformInstance', 'testResults']}
|
|
15834
|
+
ASPECT_INFO = {'keyForEntity': 'mlModelDeployment', 'entityCategory': 'core', 'entityAspects': ['mlModelDeploymentProperties', 'ownership', 'status', 'deprecation', 'globalTags', 'dataPlatformInstance', 'testResults', 'container']}
|
|
15835
15835
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey")
|
|
15836
15836
|
|
|
15837
15837
|
def __init__(self,
|
|
@@ -15886,7 +15886,7 @@ class MLModelGroupKeyClass(_Aspect):
|
|
|
15886
15886
|
|
|
15887
15887
|
|
|
15888
15888
|
ASPECT_NAME = 'mlModelGroupKey'
|
|
15889
|
-
ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes']}
|
|
15889
|
+
ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'container']}
|
|
15890
15890
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey")
|
|
15891
15891
|
|
|
15892
15892
|
def __init__(self,
|
|
@@ -15941,7 +15941,7 @@ class MLModelKeyClass(_Aspect):
|
|
|
15941
15941
|
|
|
15942
15942
|
|
|
15943
15943
|
ASPECT_NAME = 'mlModelKey'
|
|
15944
|
-
ASPECT_INFO = {'keyForEntity': 'mlModel', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelProperties', 'domains', 'ownership', 'mlModelProperties', 'intendedUse', 'mlModelFactorPrompts', 'mlModelMetrics', 'mlModelEvaluationData', 'mlModelTrainingData', 'mlModelQuantitativeAnalyses', 'mlModelEthicalConsiderations', 'mlModelCaveatsAndRecommendations', 'institutionalMemory', 'sourceCode', 'status', 'cost', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'versionProperties', 'subTypes']}
|
|
15944
|
+
ASPECT_INFO = {'keyForEntity': 'mlModel', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelProperties', 'domains', 'ownership', 'mlModelProperties', 'intendedUse', 'mlModelFactorPrompts', 'mlModelMetrics', 'mlModelEvaluationData', 'mlModelTrainingData', 'mlModelQuantitativeAnalyses', 'mlModelEthicalConsiderations', 'mlModelCaveatsAndRecommendations', 'institutionalMemory', 'sourceCode', 'status', 'cost', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'versionProperties', 'subTypes', 'container']}
|
|
15945
15945
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelKey")
|
|
15946
15946
|
|
|
15947
15947
|
def __init__(self,
|
|
@@ -788,6 +788,18 @@ class ChartUrn(_SpecificUrn):
|
|
|
788
788
|
def from_key_aspect(cls, key_aspect: "ChartKeyClass") -> "ChartUrn":
|
|
789
789
|
return cls(dashboard_tool=key_aspect.dashboardTool, chart_id=key_aspect.chartId)
|
|
790
790
|
|
|
791
|
+
@classmethod
|
|
792
|
+
def create_from_ids(
|
|
793
|
+
cls,
|
|
794
|
+
platform: str,
|
|
795
|
+
name: str,
|
|
796
|
+
platform_instance: Optional[str] = None,
|
|
797
|
+
) -> "ChartUrn":
|
|
798
|
+
return ChartUrn(
|
|
799
|
+
dashboard_tool=platform,
|
|
800
|
+
chart_id=f"{platform_instance}.{name}" if platform_instance else name,
|
|
801
|
+
)
|
|
802
|
+
|
|
791
803
|
@property
|
|
792
804
|
def dashboard_tool(self) -> str:
|
|
793
805
|
return self._entity_ids[0]
|
|
@@ -1480,6 +1492,18 @@ class DashboardUrn(_SpecificUrn):
|
|
|
1480
1492
|
def from_key_aspect(cls, key_aspect: "DashboardKeyClass") -> "DashboardUrn":
|
|
1481
1493
|
return cls(dashboard_tool=key_aspect.dashboardTool, dashboard_id=key_aspect.dashboardId)
|
|
1482
1494
|
|
|
1495
|
+
@classmethod
|
|
1496
|
+
def create_from_ids(
|
|
1497
|
+
cls,
|
|
1498
|
+
platform: str,
|
|
1499
|
+
name: str,
|
|
1500
|
+
platform_instance: Optional[str] = None,
|
|
1501
|
+
) -> "DashboardUrn":
|
|
1502
|
+
return DashboardUrn(
|
|
1503
|
+
dashboard_tool=platform,
|
|
1504
|
+
dashboard_id=f"{platform_instance}.{name}" if platform_instance else name,
|
|
1505
|
+
)
|
|
1506
|
+
|
|
1483
1507
|
@property
|
|
1484
1508
|
def dashboard_tool(self) -> str:
|
|
1485
1509
|
return self._entity_ids[0]
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -15636,7 +15636,8 @@
|
|
|
15636
15636
|
"ownership",
|
|
15637
15637
|
"structuredProperties",
|
|
15638
15638
|
"forms",
|
|
15639
|
-
"testResults"
|
|
15639
|
+
"testResults",
|
|
15640
|
+
"displayProperties"
|
|
15640
15641
|
],
|
|
15641
15642
|
"entityDoc": "A data domain within an organization."
|
|
15642
15643
|
},
|