acryl-datahub 1.0.0rc9__py3-none-any.whl → 1.0.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (36) hide show
  1. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/METADATA +2445 -2446
  2. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/RECORD +36 -35
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/common/serialized_value.py +4 -3
  5. datahub/emitter/mce_builder.py +28 -13
  6. datahub/ingestion/source/common/subtypes.py +7 -0
  7. datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
  8. datahub/ingestion/source/identity/okta.py +22 -0
  9. datahub/ingestion/source/metabase.py +3 -3
  10. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  11. datahub/ingestion/source/mode.py +1 -1
  12. datahub/ingestion/source/redshift/config.py +4 -0
  13. datahub/ingestion/source/redshift/datashares.py +236 -0
  14. datahub/ingestion/source/redshift/lineage.py +6 -2
  15. datahub/ingestion/source/redshift/lineage_v2.py +7 -4
  16. datahub/ingestion/source/redshift/profile.py +1 -1
  17. datahub/ingestion/source/redshift/query.py +125 -33
  18. datahub/ingestion/source/redshift/redshift.py +41 -72
  19. datahub/ingestion/source/redshift/redshift_schema.py +166 -6
  20. datahub/ingestion/source/redshift/report.py +3 -0
  21. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  22. datahub/ingestion/source/sql/mssql/source.py +10 -4
  23. datahub/ingestion/source/sql/oracle.py +93 -63
  24. datahub/metadata/_schema_classes.py +5 -5
  25. datahub/metadata/_urns/urn_defs.py +24 -0
  26. datahub/metadata/schema.avsc +2 -1
  27. datahub/metadata/schemas/DomainKey.avsc +2 -1
  28. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  29. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  30. datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
  31. datahub/metadata/schemas/MLModelKey.avsc +2 -1
  32. datahub/sql_parsing/sql_parsing_common.py +7 -0
  33. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/LICENSE +0 -0
  34. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/WHEEL +0 -0
  35. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/entry_points.txt +0 -0
  36. {acryl_datahub-1.0.0rc9.dist-info → acryl_datahub-1.0.0rc11.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,8 @@
1
1
  import logging
2
+ import re
2
3
  from dataclasses import dataclass, field
3
4
  from datetime import datetime, timezone
4
- from typing import Dict, Iterable, List, Optional, Tuple
5
+ from typing import Dict, Iterable, List, Optional, Tuple, Union
5
6
 
6
7
  import redshift_connector
7
8
 
@@ -41,6 +42,10 @@ class RedshiftTable(BaseTable):
41
42
  serde_parameters: Optional[str] = None
42
43
  last_altered: Optional[datetime] = None
43
44
 
45
+ @property
46
+ def is_external_table(self) -> bool:
47
+ return self.type == "EXTERNAL_TABLE"
48
+
44
49
 
45
50
  @dataclass
46
51
  class RedshiftView(BaseTable):
@@ -51,6 +56,10 @@ class RedshiftView(BaseTable):
51
56
  size_in_bytes: Optional[int] = None
52
57
  rows_count: Optional[int] = None
53
58
 
59
+ @property
60
+ def is_external_table(self) -> bool:
61
+ return self.type == "EXTERNAL_TABLE"
62
+
54
63
 
55
64
  @dataclass
56
65
  class RedshiftSchema:
@@ -59,8 +68,102 @@ class RedshiftSchema:
59
68
  type: str
60
69
  owner: Optional[str] = None
61
70
  option: Optional[str] = None
71
+ external_platform: Optional[str] = None
62
72
  external_database: Optional[str] = None
63
73
 
74
+ @property
75
+ def is_external_schema(self) -> bool:
76
+ return self.type == "external"
77
+
78
+
79
+ @dataclass
80
+ class PartialInboundDatashare:
81
+ share_name: str
82
+ producer_namespace_prefix: str
83
+ consumer_database: str
84
+
85
+ def get_description(self) -> str:
86
+ return (
87
+ f"Namespace Prefix {self.producer_namespace_prefix} Share {self.share_name}"
88
+ )
89
+
90
+
91
+ @dataclass
92
+ class OutboundDatashare:
93
+ share_name: str
94
+ producer_namespace: str
95
+ source_database: str
96
+
97
+ def get_key(self) -> str:
98
+ return f"{self.producer_namespace}.{self.share_name}"
99
+
100
+
101
+ @dataclass
102
+ class InboundDatashare:
103
+ share_name: str
104
+ producer_namespace: str
105
+ consumer_database: str
106
+
107
+ def get_key(self) -> str:
108
+ return f"{self.producer_namespace}.{self.share_name}"
109
+
110
+ def get_description(self) -> str:
111
+ return f"Namespace {self.producer_namespace} Share {self.share_name}"
112
+
113
+
114
+ @dataclass
115
+ class RedshiftDatabase:
116
+ name: str
117
+ type: str
118
+ options: Optional[str] = None
119
+
120
+ @property
121
+ def is_shared_database(self) -> bool:
122
+ return self.type == "shared"
123
+
124
+ # NOTE: ideally options are in form
125
+ # {"datashare_name":"xxx","datashare_producer_account":"1234","datashare_producer_namespace":"yyy"}
126
+ # however due to varchar(128) type of database table that captures options
127
+ # we may receive only partial information about inbound share
128
+ def get_inbound_share(
129
+ self,
130
+ ) -> Optional[Union[InboundDatashare, PartialInboundDatashare]]:
131
+ if not self.is_shared_database or not self.options:
132
+ return None
133
+
134
+ # Convert into single regex ??
135
+ share_name_match = re.search(r'"datashare_name"\s*:\s*"([^"]*)"', self.options)
136
+ namespace_match = re.search(
137
+ r'"datashare_producer_namespace"\s*:\s*"([^"]*)"', self.options
138
+ )
139
+ partial_namespace_match = re.search(
140
+ r'"datashare_producer_namespace"\s*:\s*"([^"]*)$', self.options
141
+ )
142
+
143
+ if not share_name_match:
144
+ # We will always at least get share name
145
+ return None
146
+
147
+ share_name = share_name_match.group(1)
148
+ if namespace_match:
149
+ return InboundDatashare(
150
+ share_name=share_name,
151
+ producer_namespace=namespace_match.group(1),
152
+ consumer_database=self.name,
153
+ )
154
+ elif partial_namespace_match:
155
+ return PartialInboundDatashare(
156
+ share_name=share_name,
157
+ producer_namespace_prefix=partial_namespace_match.group(1),
158
+ consumer_database=self.name,
159
+ )
160
+ else:
161
+ return PartialInboundDatashare(
162
+ share_name=share_name,
163
+ producer_namespace_prefix="",
164
+ consumer_database=self.name,
165
+ )
166
+
64
167
 
65
168
  @dataclass
66
169
  class RedshiftExtraTableMeta:
@@ -141,13 +244,31 @@ class RedshiftDataDictionary:
141
244
 
142
245
  return [db[0] for db in dbs]
143
246
 
247
+ @staticmethod
248
+ def get_database_details(
249
+ conn: redshift_connector.Connection, database: str
250
+ ) -> Optional[RedshiftDatabase]:
251
+ cursor = RedshiftDataDictionary.get_query_result(
252
+ conn,
253
+ RedshiftCommonQuery.get_database_details(database),
254
+ )
255
+
256
+ row = cursor.fetchone()
257
+ if row is None:
258
+ return None
259
+ return RedshiftDatabase(
260
+ name=database,
261
+ type=row[1],
262
+ options=row[2],
263
+ )
264
+
144
265
  @staticmethod
145
266
  def get_schemas(
146
267
  conn: redshift_connector.Connection, database: str
147
268
  ) -> List[RedshiftSchema]:
148
269
  cursor = RedshiftDataDictionary.get_query_result(
149
270
  conn,
150
- RedshiftCommonQuery.list_schemas.format(database_name=database),
271
+ RedshiftCommonQuery.list_schemas(database),
151
272
  )
152
273
 
153
274
  schemas = cursor.fetchall()
@@ -158,8 +279,8 @@ class RedshiftDataDictionary:
158
279
  database=database,
159
280
  name=schema[field_names.index("schema_name")],
160
281
  type=schema[field_names.index("schema_type")],
161
- owner=schema[field_names.index("schema_owner_name")],
162
282
  option=schema[field_names.index("schema_option")],
283
+ external_platform=schema[field_names.index("external_platform")],
163
284
  external_database=schema[field_names.index("external_database")],
164
285
  )
165
286
  for schema in schemas
@@ -203,6 +324,7 @@ class RedshiftDataDictionary:
203
324
  self,
204
325
  conn: redshift_connector.Connection,
205
326
  skip_external_tables: bool = False,
327
+ is_shared_database: bool = False,
206
328
  ) -> Tuple[Dict[str, List[RedshiftTable]], Dict[str, List[RedshiftView]]]:
207
329
  tables: Dict[str, List[RedshiftTable]] = {}
208
330
  views: Dict[str, List[RedshiftView]] = {}
@@ -213,7 +335,10 @@ class RedshiftDataDictionary:
213
335
 
214
336
  cur = RedshiftDataDictionary.get_query_result(
215
337
  conn,
216
- RedshiftCommonQuery.list_tables(skip_external_tables=skip_external_tables),
338
+ RedshiftCommonQuery.list_tables(
339
+ skip_external_tables=skip_external_tables,
340
+ is_shared_database=is_shared_database,
341
+ ),
217
342
  )
218
343
  field_names = [i[0] for i in cur.description]
219
344
  db_tables = cur.fetchall()
@@ -358,11 +483,15 @@ class RedshiftDataDictionary:
358
483
 
359
484
  @staticmethod
360
485
  def get_columns_for_schema(
361
- conn: redshift_connector.Connection, schema: RedshiftSchema
486
+ conn: redshift_connector.Connection,
487
+ schema: RedshiftSchema,
488
+ is_shared_database: bool = False,
362
489
  ) -> Dict[str, List[RedshiftColumn]]:
363
490
  cursor = RedshiftDataDictionary.get_query_result(
364
491
  conn,
365
- RedshiftCommonQuery.list_columns.format(schema_name=schema.name),
492
+ RedshiftCommonQuery.list_columns(
493
+ is_shared_database=is_shared_database
494
+ ).format(schema_name=schema.name),
366
495
  )
367
496
 
368
497
  table_columns: Dict[str, List[RedshiftColumn]] = {}
@@ -508,3 +637,34 @@ class RedshiftDataDictionary:
508
637
  start_time=row[field_names.index("start_time")],
509
638
  )
510
639
  rows = cursor.fetchmany()
640
+
641
+ @staticmethod
642
+ def get_outbound_datashares(
643
+ conn: redshift_connector.Connection,
644
+ ) -> Iterable[OutboundDatashare]:
645
+ cursor = conn.cursor()
646
+ cursor.execute(RedshiftCommonQuery.list_outbound_datashares())
647
+ for item in cursor.fetchall():
648
+ yield OutboundDatashare(
649
+ share_name=item[1],
650
+ producer_namespace=item[2],
651
+ source_database=item[3],
652
+ )
653
+
654
+ # NOTE: this is not used right now as it requires superuser privilege
655
+ # We can use this in future if the permissions are lowered.
656
+ @staticmethod
657
+ def get_inbound_datashare(
658
+ conn: redshift_connector.Connection,
659
+ database: str,
660
+ ) -> Optional[InboundDatashare]:
661
+ cursor = conn.cursor()
662
+ cursor.execute(RedshiftCommonQuery.get_inbound_datashare(database))
663
+ item = cursor.fetchone()
664
+ if item:
665
+ return InboundDatashare(
666
+ share_name=item[1],
667
+ producer_namespace=item[2],
668
+ consumer_database=item[3],
669
+ )
670
+ return None
@@ -60,5 +60,8 @@ class RedshiftReport(
60
60
  sql_aggregator: Optional[SqlAggregatorReport] = None
61
61
  lineage_phases_timer: Dict[str, PerfTimer] = field(default_factory=dict)
62
62
 
63
+ is_shared_database: bool = False
64
+ outbound_shares_count: Optional[int] = None
65
+
63
66
  def report_dropped(self, key: str) -> None:
64
67
  self.filtered.append(key)
@@ -11,12 +11,17 @@ from datahub.emitter.mcp_builder import (
11
11
  DatabaseKey,
12
12
  SchemaKey,
13
13
  )
14
+ from datahub.ingestion.source.common.subtypes import (
15
+ FlowContainerSubTypes,
16
+ JobContainerSubTypes,
17
+ )
14
18
  from datahub.metadata.schema_classes import (
15
19
  ContainerClass,
16
20
  DataFlowInfoClass,
17
21
  DataJobInfoClass,
18
22
  DataJobInputOutputClass,
19
23
  DataPlatformInstanceClass,
24
+ SubTypesClass,
20
25
  )
21
26
 
22
27
 
@@ -211,6 +216,18 @@ class MSSQLDataJob:
211
216
  status=self.status,
212
217
  )
213
218
 
219
+ @property
220
+ def as_subtypes_aspect(self) -> SubTypesClass:
221
+ assert isinstance(self.entity, (JobStep, StoredProcedure))
222
+ type = (
223
+ JobContainerSubTypes.MSSQL_JOBSTEP
224
+ if isinstance(self.entity, JobStep)
225
+ else JobContainerSubTypes.MSSQL_STORED_PROCEDURE
226
+ )
227
+ return SubTypesClass(
228
+ typeNames=[type],
229
+ )
230
+
214
231
  @property
215
232
  def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
216
233
  if self.entity.flow.platform_instance:
@@ -276,6 +293,18 @@ class MSSQLDataFlow:
276
293
  externalUrl=self.external_url,
277
294
  )
278
295
 
296
+ @property
297
+ def as_subtypes_aspect(self) -> SubTypesClass:
298
+ assert isinstance(self.entity, (MSSQLJob, MSSQLProceduresContainer))
299
+ type = (
300
+ FlowContainerSubTypes.MSSQL_JOB
301
+ if isinstance(self.entity, MSSQLJob)
302
+ else FlowContainerSubTypes.MSSQL_PROCEDURE_CONTAINER
303
+ )
304
+ return SubTypesClass(
305
+ typeNames=[type],
306
+ )
307
+
279
308
  @property
280
309
  def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
281
310
  if self.entity.platform_instance:
@@ -638,6 +638,11 @@ class SQLServerSource(SQLAlchemySource):
638
638
  aspect=data_job.as_datajob_info_aspect,
639
639
  ).as_workunit()
640
640
 
641
+ yield MetadataChangeProposalWrapper(
642
+ entityUrn=data_job.urn,
643
+ aspect=data_job.as_subtypes_aspect,
644
+ ).as_workunit()
645
+
641
646
  data_platform_instance_aspect = data_job.as_maybe_platform_instance_aspect
642
647
  if data_platform_instance_aspect:
643
648
  yield MetadataChangeProposalWrapper(
@@ -676,8 +681,6 @@ class SQLServerSource(SQLAlchemySource):
676
681
  ),
677
682
  ).as_workunit()
678
683
 
679
- # TODO: Add SubType when it appear
680
-
681
684
  def construct_flow_workunits(
682
685
  self,
683
686
  data_flow: MSSQLDataFlow,
@@ -687,6 +690,11 @@ class SQLServerSource(SQLAlchemySource):
687
690
  aspect=data_flow.as_dataflow_info_aspect,
688
691
  ).as_workunit()
689
692
 
693
+ yield MetadataChangeProposalWrapper(
694
+ entityUrn=data_flow.urn,
695
+ aspect=data_flow.as_subtypes_aspect,
696
+ ).as_workunit()
697
+
690
698
  data_platform_instance_aspect = data_flow.as_maybe_platform_instance_aspect
691
699
  if data_platform_instance_aspect:
692
700
  yield MetadataChangeProposalWrapper(
@@ -700,8 +708,6 @@ class SQLServerSource(SQLAlchemySource):
700
708
  aspect=data_flow.as_container_aspect,
701
709
  ).as_workunit()
702
710
 
703
- # TODO: Add SubType when it appear
704
-
705
711
  def get_inspectors(self) -> Iterable[Inspector]:
706
712
  # This method can be overridden in the case that you want to dynamically
707
713
  # run on multiple databases.
@@ -152,6 +152,7 @@ class OracleInspectorObjectWrapper:
152
152
  self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX")
153
153
 
154
154
  def get_db_name(self) -> str:
155
+ db_name = None
155
156
  try:
156
157
  # Try to retrieve current DB name by executing query
157
158
  db_name = self._inspector_instance.bind.execute(
@@ -159,7 +160,12 @@ class OracleInspectorObjectWrapper:
159
160
  ).scalar()
160
161
  return str(db_name)
161
162
  except sqlalchemy.exc.DatabaseError as e:
162
- logger.error("Error fetching DB name: " + str(e))
163
+ self.report.failure(
164
+ title="Error fetching database name using sys_context.",
165
+ message="database_fetch_error",
166
+ context=db_name,
167
+ exc=e,
168
+ )
163
169
  return ""
164
170
 
165
171
  def get_schema_names(self) -> List[str]:
@@ -326,8 +332,8 @@ class OracleInspectorObjectWrapper:
326
332
  try:
327
333
  coltype = ischema_names[coltype]()
328
334
  except KeyError:
329
- logger.warning(
330
- f"Did not recognize type {coltype} of column {colname}"
335
+ logger.info(
336
+ f"Unrecognized column datatype {coltype} of column {colname}"
331
337
  )
332
338
  coltype = sqltypes.NULLTYPE
333
339
 
@@ -379,8 +385,8 @@ class OracleInspectorObjectWrapper:
379
385
  COMMENT_SQL = """
380
386
  SELECT comments
381
387
  FROM dba_tab_comments
382
- WHERE table_name = CAST(:table_name AS VARCHAR(128))
383
- AND owner = CAST(:schema_name AS VARCHAR(128))
388
+ WHERE table_name = :table_name
389
+ AND owner = :schema_name
384
390
  """
385
391
 
386
392
  c = self._inspector_instance.bind.execute(
@@ -397,79 +403,93 @@ class OracleInspectorObjectWrapper:
397
403
 
398
404
  text = (
399
405
  "SELECT"
400
- "\nac.constraint_name," # 0
401
- "\nac.constraint_type," # 1
402
- "\nloc.column_name AS local_column," # 2
403
- "\nrem.table_name AS remote_table," # 3
404
- "\nrem.column_name AS remote_column," # 4
405
- "\nrem.owner AS remote_owner," # 5
406
- "\nloc.position as loc_pos," # 6
407
- "\nrem.position as rem_pos," # 7
408
- "\nac.search_condition," # 8
409
- "\nac.delete_rule" # 9
410
- "\nFROM dba_constraints%(dblink)s ac,"
411
- "\ndba_cons_columns%(dblink)s loc,"
412
- "\ndba_cons_columns%(dblink)s rem"
413
- "\nWHERE ac.table_name = CAST(:table_name AS VARCHAR2(128))"
414
- "\nAND ac.constraint_type IN ('R','P', 'U', 'C')"
406
+ "\nac.constraint_name,"
407
+ "\nac.constraint_type,"
408
+ "\nacc.column_name AS local_column,"
409
+ "\nNULL AS remote_table,"
410
+ "\nNULL AS remote_column,"
411
+ "\nNULL AS remote_owner,"
412
+ "\nacc.position AS loc_pos,"
413
+ "\nNULL AS rem_pos,"
414
+ "\nac.search_condition,"
415
+ "\nac.delete_rule"
416
+ "\nFROM dba_constraints ac"
417
+ "\nJOIN dba_cons_columns acc"
418
+ "\nON ac.owner = acc.owner"
419
+ "\nAND ac.constraint_name = acc.constraint_name"
420
+ "\nAND ac.table_name = acc.table_name"
421
+ "\nWHERE ac.table_name = :table_name"
422
+ "\nAND ac.constraint_type IN ('P', 'U', 'C')"
415
423
  )
416
424
 
417
425
  if schema is not None:
418
426
  params["owner"] = schema
419
- text += "\nAND ac.owner = CAST(:owner AS VARCHAR2(128))"
427
+ text += "\nAND ac.owner = :owner"
420
428
 
429
+ # Splitting into queries with UNION ALL for execution efficiency
421
430
  text += (
422
- "\nAND ac.owner = loc.owner"
423
- "\nAND ac.constraint_name = loc.constraint_name"
424
- "\nAND ac.r_owner = rem.owner(+)"
425
- "\nAND ac.r_constraint_name = rem.constraint_name(+)"
426
- "\nAND (rem.position IS NULL or loc.position=rem.position)"
427
- "\nORDER BY ac.constraint_name, loc.position"
431
+ "\nUNION ALL"
432
+ "\nSELECT"
433
+ "\nac.constraint_name,"
434
+ "\nac.constraint_type,"
435
+ "\nacc.column_name AS local_column,"
436
+ "\nac.r_table_name AS remote_table,"
437
+ "\nrcc.column_name AS remote_column,"
438
+ "\nac.r_owner AS remote_owner,"
439
+ "\nacc.position AS loc_pos,"
440
+ "\nrcc.position AS rem_pos,"
441
+ "\nac.search_condition,"
442
+ "\nac.delete_rule"
443
+ "\nFROM dba_constraints ac"
444
+ "\nJOIN dba_cons_columns acc"
445
+ "\nON ac.owner = acc.owner"
446
+ "\nAND ac.constraint_name = acc.constraint_name"
447
+ "\nAND ac.table_name = acc.table_name"
448
+ "\nLEFT JOIN dba_cons_columns rcc"
449
+ "\nON ac.r_owner = rcc.owner"
450
+ "\nAND ac.r_constraint_name = rcc.constraint_name"
451
+ "\nAND acc.position = rcc.position"
452
+ "\nWHERE ac.table_name = :table_name"
453
+ "\nAND ac.constraint_type = 'R'"
428
454
  )
429
455
 
430
- text = text % {"dblink": dblink}
456
+ if schema is not None:
457
+ text += "\nAND ac.owner = :owner"
458
+
459
+ text += "\nORDER BY constraint_name, loc_pos"
460
+
431
461
  rp = self._inspector_instance.bind.execute(sql.text(text), params)
432
- constraint_data = rp.fetchall()
433
- return constraint_data
462
+ return rp.fetchall()
434
463
 
435
464
  def get_pk_constraint(
436
465
  self, table_name: str, schema: Optional[str] = None, dblink: str = ""
437
466
  ) -> Dict:
438
- denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
439
- table_name
440
- )
441
- assert denormalized_table_name
442
-
443
- schema = self._inspector_instance.dialect.denormalize_name(
444
- schema or self.default_schema_name
445
- )
446
-
447
- if schema is None:
448
- schema = self._inspector_instance.dialect.default_schema_name
449
-
450
467
  pkeys = []
451
468
  constraint_name = None
452
- constraint_data = self._get_constraint_data(
453
- denormalized_table_name, schema, dblink
454
- )
455
469
 
456
- for row in constraint_data:
457
- (
458
- cons_name,
459
- cons_type,
460
- local_column,
461
- remote_table,
462
- remote_column,
463
- remote_owner,
464
- ) = row[0:2] + tuple(
465
- [self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]]
470
+ try:
471
+ for row in self._get_constraint_data(table_name, schema, dblink):
472
+ if row[1] == "P": # constraint_type is 'P' for primary key
473
+ if constraint_name is None:
474
+ constraint_name = (
475
+ self._inspector_instance.dialect.normalize_name(row[0])
476
+ )
477
+ col_name = self._inspector_instance.dialect.normalize_name(
478
+ row[2]
479
+ ) # local_column
480
+ pkeys.append(col_name)
481
+ except Exception as e:
482
+ self.report.warning(
483
+ title="Failed to Process Primary Keys",
484
+ message=(
485
+ f"Unable to process primary key constraints for {schema}.{table_name}. "
486
+ "Ensure SELECT access on DBA_CONSTRAINTS and DBA_CONS_COLUMNS.",
487
+ ),
488
+ context=f"{schema}.{table_name}",
489
+ exc=e,
466
490
  )
467
- if cons_type == "P":
468
- if constraint_name is None:
469
- constraint_name = self._inspector_instance.dialect.normalize_name(
470
- cons_name
471
- )
472
- pkeys.append(local_column)
491
+ # Return empty constraint if we can't process it
492
+ return {"constrained_columns": [], "name": None}
473
493
 
474
494
  return {"constrained_columns": pkeys, "name": constraint_name}
475
495
 
@@ -527,6 +547,16 @@ class OracleInspectorObjectWrapper:
527
547
  f"dba_cons_columns{dblink} - does the user have "
528
548
  "proper rights to the table?"
529
549
  )
550
+ self.report.warning(
551
+ title="Missing Table Permissions",
552
+ message=(
553
+ f"Unable to query table_name from dba_cons_columns{dblink}. "
554
+ "This usually indicates insufficient permissions on the target table. "
555
+ f"Foreign key relationships will not be detected for {schema}.{table_name}. "
556
+ "Please ensure the user has SELECT privileges on dba_cons_columns."
557
+ ),
558
+ context=f"{schema}.{table_name}",
559
+ )
530
560
 
531
561
  rec = fkeys[cons_name]
532
562
  rec["name"] = cons_name
@@ -573,8 +603,8 @@ class OracleInspectorObjectWrapper:
573
603
  text = "SELECT text FROM dba_views WHERE view_name=:view_name"
574
604
 
575
605
  if schema is not None:
576
- text += " AND owner = :schema"
577
- params["schema"] = schema
606
+ params["owner"] = schema
607
+ text += "\nAND owner = :owner"
578
608
 
579
609
  rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar()
580
610
 
@@ -15486,7 +15486,7 @@ class DomainKeyClass(_Aspect):
15486
15486
 
15487
15487
 
15488
15488
  ASPECT_NAME = 'domainKey'
15489
- ASPECT_INFO = {'keyForEntity': 'domain', 'entityCategory': 'core', 'entityAspects': ['domainProperties', 'institutionalMemory', 'ownership', 'structuredProperties', 'forms', 'testResults'], 'entityDoc': 'A data domain within an organization.'}
15489
+ ASPECT_INFO = {'keyForEntity': 'domain', 'entityCategory': 'core', 'entityAspects': ['domainProperties', 'institutionalMemory', 'ownership', 'structuredProperties', 'forms', 'testResults', 'displayProperties'], 'entityDoc': 'A data domain within an organization.'}
15490
15490
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DomainKey")
15491
15491
 
15492
15492
  def __init__(self,
@@ -15631,7 +15631,7 @@ class GlossaryNodeKeyClass(_Aspect):
15631
15631
 
15632
15632
 
15633
15633
  ASPECT_NAME = 'glossaryNodeKey'
15634
- ASPECT_INFO = {'keyForEntity': 'glossaryNode', 'entityCategory': 'core', 'entityAspects': ['glossaryNodeInfo', 'institutionalMemory', 'ownership', 'status', 'structuredProperties', 'forms', 'testResults', 'subTypes']}
15634
+ ASPECT_INFO = {'keyForEntity': 'glossaryNode', 'entityCategory': 'core', 'entityAspects': ['glossaryNodeInfo', 'institutionalMemory', 'ownership', 'status', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'displayProperties']}
15635
15635
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey")
15636
15636
 
15637
15637
  def __init__(self,
@@ -15831,7 +15831,7 @@ class MLModelDeploymentKeyClass(_Aspect):
15831
15831
 
15832
15832
 
15833
15833
  ASPECT_NAME = 'mlModelDeploymentKey'
15834
- ASPECT_INFO = {'keyForEntity': 'mlModelDeployment', 'entityCategory': 'core', 'entityAspects': ['mlModelDeploymentProperties', 'ownership', 'status', 'deprecation', 'globalTags', 'dataPlatformInstance', 'testResults']}
15834
+ ASPECT_INFO = {'keyForEntity': 'mlModelDeployment', 'entityCategory': 'core', 'entityAspects': ['mlModelDeploymentProperties', 'ownership', 'status', 'deprecation', 'globalTags', 'dataPlatformInstance', 'testResults', 'container']}
15835
15835
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey")
15836
15836
 
15837
15837
  def __init__(self,
@@ -15886,7 +15886,7 @@ class MLModelGroupKeyClass(_Aspect):
15886
15886
 
15887
15887
 
15888
15888
  ASPECT_NAME = 'mlModelGroupKey'
15889
- ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes']}
15889
+ ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'container']}
15890
15890
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey")
15891
15891
 
15892
15892
  def __init__(self,
@@ -15941,7 +15941,7 @@ class MLModelKeyClass(_Aspect):
15941
15941
 
15942
15942
 
15943
15943
  ASPECT_NAME = 'mlModelKey'
15944
- ASPECT_INFO = {'keyForEntity': 'mlModel', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelProperties', 'domains', 'ownership', 'mlModelProperties', 'intendedUse', 'mlModelFactorPrompts', 'mlModelMetrics', 'mlModelEvaluationData', 'mlModelTrainingData', 'mlModelQuantitativeAnalyses', 'mlModelEthicalConsiderations', 'mlModelCaveatsAndRecommendations', 'institutionalMemory', 'sourceCode', 'status', 'cost', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'versionProperties', 'subTypes']}
15944
+ ASPECT_INFO = {'keyForEntity': 'mlModel', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelProperties', 'domains', 'ownership', 'mlModelProperties', 'intendedUse', 'mlModelFactorPrompts', 'mlModelMetrics', 'mlModelEvaluationData', 'mlModelTrainingData', 'mlModelQuantitativeAnalyses', 'mlModelEthicalConsiderations', 'mlModelCaveatsAndRecommendations', 'institutionalMemory', 'sourceCode', 'status', 'cost', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'versionProperties', 'subTypes', 'container']}
15945
15945
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelKey")
15946
15946
 
15947
15947
  def __init__(self,
@@ -788,6 +788,18 @@ class ChartUrn(_SpecificUrn):
788
788
  def from_key_aspect(cls, key_aspect: "ChartKeyClass") -> "ChartUrn":
789
789
  return cls(dashboard_tool=key_aspect.dashboardTool, chart_id=key_aspect.chartId)
790
790
 
791
+ @classmethod
792
+ def create_from_ids(
793
+ cls,
794
+ platform: str,
795
+ name: str,
796
+ platform_instance: Optional[str] = None,
797
+ ) -> "ChartUrn":
798
+ return ChartUrn(
799
+ dashboard_tool=platform,
800
+ chart_id=f"{platform_instance}.{name}" if platform_instance else name,
801
+ )
802
+
791
803
  @property
792
804
  def dashboard_tool(self) -> str:
793
805
  return self._entity_ids[0]
@@ -1480,6 +1492,18 @@ class DashboardUrn(_SpecificUrn):
1480
1492
  def from_key_aspect(cls, key_aspect: "DashboardKeyClass") -> "DashboardUrn":
1481
1493
  return cls(dashboard_tool=key_aspect.dashboardTool, dashboard_id=key_aspect.dashboardId)
1482
1494
 
1495
+ @classmethod
1496
+ def create_from_ids(
1497
+ cls,
1498
+ platform: str,
1499
+ name: str,
1500
+ platform_instance: Optional[str] = None,
1501
+ ) -> "DashboardUrn":
1502
+ return DashboardUrn(
1503
+ dashboard_tool=platform,
1504
+ dashboard_id=f"{platform_instance}.{name}" if platform_instance else name,
1505
+ )
1506
+
1483
1507
  @property
1484
1508
  def dashboard_tool(self) -> str:
1485
1509
  return self._entity_ids[0]
@@ -15636,7 +15636,8 @@
15636
15636
  "ownership",
15637
15637
  "structuredProperties",
15638
15638
  "forms",
15639
- "testResults"
15639
+ "testResults",
15640
+ "displayProperties"
15640
15641
  ],
15641
15642
  "entityDoc": "A data domain within an organization."
15642
15643
  },