acryl-datahub 1.0.0rc10__py3-none-any.whl → 1.0.0rc11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/METADATA +2416 -2416
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/RECORD +28 -27
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/ingestion/source/iceberg/iceberg_common.py +40 -1
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +7 -4
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +125 -33
- datahub/ingestion/source/redshift/redshift.py +41 -72
- datahub/ingestion/source/redshift/redshift_schema.py +166 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/sql/oracle.py +93 -63
- datahub/metadata/_schema_classes.py +5 -5
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MLModelKey.avsc +2 -1
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc10.dist-info → acryl_datahub-1.0.0rc11.dist-info}/top_level.txt +0 -0
|
@@ -152,6 +152,7 @@ class OracleInspectorObjectWrapper:
|
|
|
152
152
|
self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX")
|
|
153
153
|
|
|
154
154
|
def get_db_name(self) -> str:
|
|
155
|
+
db_name = None
|
|
155
156
|
try:
|
|
156
157
|
# Try to retrieve current DB name by executing query
|
|
157
158
|
db_name = self._inspector_instance.bind.execute(
|
|
@@ -159,7 +160,12 @@ class OracleInspectorObjectWrapper:
|
|
|
159
160
|
).scalar()
|
|
160
161
|
return str(db_name)
|
|
161
162
|
except sqlalchemy.exc.DatabaseError as e:
|
|
162
|
-
|
|
163
|
+
self.report.failure(
|
|
164
|
+
title="Error fetching database name using sys_context.",
|
|
165
|
+
message="database_fetch_error",
|
|
166
|
+
context=db_name,
|
|
167
|
+
exc=e,
|
|
168
|
+
)
|
|
163
169
|
return ""
|
|
164
170
|
|
|
165
171
|
def get_schema_names(self) -> List[str]:
|
|
@@ -326,8 +332,8 @@ class OracleInspectorObjectWrapper:
|
|
|
326
332
|
try:
|
|
327
333
|
coltype = ischema_names[coltype]()
|
|
328
334
|
except KeyError:
|
|
329
|
-
logger.
|
|
330
|
-
f"
|
|
335
|
+
logger.info(
|
|
336
|
+
f"Unrecognized column datatype {coltype} of column {colname}"
|
|
331
337
|
)
|
|
332
338
|
coltype = sqltypes.NULLTYPE
|
|
333
339
|
|
|
@@ -379,8 +385,8 @@ class OracleInspectorObjectWrapper:
|
|
|
379
385
|
COMMENT_SQL = """
|
|
380
386
|
SELECT comments
|
|
381
387
|
FROM dba_tab_comments
|
|
382
|
-
WHERE table_name =
|
|
383
|
-
AND owner =
|
|
388
|
+
WHERE table_name = :table_name
|
|
389
|
+
AND owner = :schema_name
|
|
384
390
|
"""
|
|
385
391
|
|
|
386
392
|
c = self._inspector_instance.bind.execute(
|
|
@@ -397,79 +403,93 @@ class OracleInspectorObjectWrapper:
|
|
|
397
403
|
|
|
398
404
|
text = (
|
|
399
405
|
"SELECT"
|
|
400
|
-
"\nac.constraint_name,"
|
|
401
|
-
"\nac.constraint_type,"
|
|
402
|
-
"\
|
|
403
|
-
"\
|
|
404
|
-
"\
|
|
405
|
-
"\
|
|
406
|
-
"\
|
|
407
|
-
"\
|
|
408
|
-
"\nac.search_condition,"
|
|
409
|
-
"\nac.delete_rule"
|
|
410
|
-
"\nFROM dba_constraints
|
|
411
|
-
"\
|
|
412
|
-
"\
|
|
413
|
-
"\
|
|
414
|
-
"\nAND ac.
|
|
406
|
+
"\nac.constraint_name,"
|
|
407
|
+
"\nac.constraint_type,"
|
|
408
|
+
"\nacc.column_name AS local_column,"
|
|
409
|
+
"\nNULL AS remote_table,"
|
|
410
|
+
"\nNULL AS remote_column,"
|
|
411
|
+
"\nNULL AS remote_owner,"
|
|
412
|
+
"\nacc.position AS loc_pos,"
|
|
413
|
+
"\nNULL AS rem_pos,"
|
|
414
|
+
"\nac.search_condition,"
|
|
415
|
+
"\nac.delete_rule"
|
|
416
|
+
"\nFROM dba_constraints ac"
|
|
417
|
+
"\nJOIN dba_cons_columns acc"
|
|
418
|
+
"\nON ac.owner = acc.owner"
|
|
419
|
+
"\nAND ac.constraint_name = acc.constraint_name"
|
|
420
|
+
"\nAND ac.table_name = acc.table_name"
|
|
421
|
+
"\nWHERE ac.table_name = :table_name"
|
|
422
|
+
"\nAND ac.constraint_type IN ('P', 'U', 'C')"
|
|
415
423
|
)
|
|
416
424
|
|
|
417
425
|
if schema is not None:
|
|
418
426
|
params["owner"] = schema
|
|
419
|
-
text += "\nAND ac.owner =
|
|
427
|
+
text += "\nAND ac.owner = :owner"
|
|
420
428
|
|
|
429
|
+
# Splitting into queries with UNION ALL for execution efficiency
|
|
421
430
|
text += (
|
|
422
|
-
"\
|
|
423
|
-
"\
|
|
424
|
-
"\
|
|
425
|
-
"\
|
|
426
|
-
"\
|
|
427
|
-
"\
|
|
431
|
+
"\nUNION ALL"
|
|
432
|
+
"\nSELECT"
|
|
433
|
+
"\nac.constraint_name,"
|
|
434
|
+
"\nac.constraint_type,"
|
|
435
|
+
"\nacc.column_name AS local_column,"
|
|
436
|
+
"\nac.r_table_name AS remote_table,"
|
|
437
|
+
"\nrcc.column_name AS remote_column,"
|
|
438
|
+
"\nac.r_owner AS remote_owner,"
|
|
439
|
+
"\nacc.position AS loc_pos,"
|
|
440
|
+
"\nrcc.position AS rem_pos,"
|
|
441
|
+
"\nac.search_condition,"
|
|
442
|
+
"\nac.delete_rule"
|
|
443
|
+
"\nFROM dba_constraints ac"
|
|
444
|
+
"\nJOIN dba_cons_columns acc"
|
|
445
|
+
"\nON ac.owner = acc.owner"
|
|
446
|
+
"\nAND ac.constraint_name = acc.constraint_name"
|
|
447
|
+
"\nAND ac.table_name = acc.table_name"
|
|
448
|
+
"\nLEFT JOIN dba_cons_columns rcc"
|
|
449
|
+
"\nON ac.r_owner = rcc.owner"
|
|
450
|
+
"\nAND ac.r_constraint_name = rcc.constraint_name"
|
|
451
|
+
"\nAND acc.position = rcc.position"
|
|
452
|
+
"\nWHERE ac.table_name = :table_name"
|
|
453
|
+
"\nAND ac.constraint_type = 'R'"
|
|
428
454
|
)
|
|
429
455
|
|
|
430
|
-
|
|
456
|
+
if schema is not None:
|
|
457
|
+
text += "\nAND ac.owner = :owner"
|
|
458
|
+
|
|
459
|
+
text += "\nORDER BY constraint_name, loc_pos"
|
|
460
|
+
|
|
431
461
|
rp = self._inspector_instance.bind.execute(sql.text(text), params)
|
|
432
|
-
|
|
433
|
-
return constraint_data
|
|
462
|
+
return rp.fetchall()
|
|
434
463
|
|
|
435
464
|
def get_pk_constraint(
|
|
436
465
|
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
|
|
437
466
|
) -> Dict:
|
|
438
|
-
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
|
439
|
-
table_name
|
|
440
|
-
)
|
|
441
|
-
assert denormalized_table_name
|
|
442
|
-
|
|
443
|
-
schema = self._inspector_instance.dialect.denormalize_name(
|
|
444
|
-
schema or self.default_schema_name
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
if schema is None:
|
|
448
|
-
schema = self._inspector_instance.dialect.default_schema_name
|
|
449
|
-
|
|
450
467
|
pkeys = []
|
|
451
468
|
constraint_name = None
|
|
452
|
-
constraint_data = self._get_constraint_data(
|
|
453
|
-
denormalized_table_name, schema, dblink
|
|
454
|
-
)
|
|
455
469
|
|
|
456
|
-
|
|
457
|
-
(
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
470
|
+
try:
|
|
471
|
+
for row in self._get_constraint_data(table_name, schema, dblink):
|
|
472
|
+
if row[1] == "P": # constraint_type is 'P' for primary key
|
|
473
|
+
if constraint_name is None:
|
|
474
|
+
constraint_name = (
|
|
475
|
+
self._inspector_instance.dialect.normalize_name(row[0])
|
|
476
|
+
)
|
|
477
|
+
col_name = self._inspector_instance.dialect.normalize_name(
|
|
478
|
+
row[2]
|
|
479
|
+
) # local_column
|
|
480
|
+
pkeys.append(col_name)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
self.report.warning(
|
|
483
|
+
title="Failed to Process Primary Keys",
|
|
484
|
+
message=(
|
|
485
|
+
f"Unable to process primary key constraints for {schema}.{table_name}. "
|
|
486
|
+
"Ensure SELECT access on DBA_CONSTRAINTS and DBA_CONS_COLUMNS.",
|
|
487
|
+
),
|
|
488
|
+
context=f"{schema}.{table_name}",
|
|
489
|
+
exc=e,
|
|
466
490
|
)
|
|
467
|
-
if
|
|
468
|
-
|
|
469
|
-
constraint_name = self._inspector_instance.dialect.normalize_name(
|
|
470
|
-
cons_name
|
|
471
|
-
)
|
|
472
|
-
pkeys.append(local_column)
|
|
491
|
+
# Return empty constraint if we can't process it
|
|
492
|
+
return {"constrained_columns": [], "name": None}
|
|
473
493
|
|
|
474
494
|
return {"constrained_columns": pkeys, "name": constraint_name}
|
|
475
495
|
|
|
@@ -527,6 +547,16 @@ class OracleInspectorObjectWrapper:
|
|
|
527
547
|
f"dba_cons_columns{dblink} - does the user have "
|
|
528
548
|
"proper rights to the table?"
|
|
529
549
|
)
|
|
550
|
+
self.report.warning(
|
|
551
|
+
title="Missing Table Permissions",
|
|
552
|
+
message=(
|
|
553
|
+
f"Unable to query table_name from dba_cons_columns{dblink}. "
|
|
554
|
+
"This usually indicates insufficient permissions on the target table. "
|
|
555
|
+
f"Foreign key relationships will not be detected for {schema}.{table_name}. "
|
|
556
|
+
"Please ensure the user has SELECT privileges on dba_cons_columns."
|
|
557
|
+
),
|
|
558
|
+
context=f"{schema}.{table_name}",
|
|
559
|
+
)
|
|
530
560
|
|
|
531
561
|
rec = fkeys[cons_name]
|
|
532
562
|
rec["name"] = cons_name
|
|
@@ -573,8 +603,8 @@ class OracleInspectorObjectWrapper:
|
|
|
573
603
|
text = "SELECT text FROM dba_views WHERE view_name=:view_name"
|
|
574
604
|
|
|
575
605
|
if schema is not None:
|
|
576
|
-
|
|
577
|
-
|
|
606
|
+
params["owner"] = schema
|
|
607
|
+
text += "\nAND owner = :owner"
|
|
578
608
|
|
|
579
609
|
rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar()
|
|
580
610
|
|
|
@@ -15486,7 +15486,7 @@ class DomainKeyClass(_Aspect):
|
|
|
15486
15486
|
|
|
15487
15487
|
|
|
15488
15488
|
ASPECT_NAME = 'domainKey'
|
|
15489
|
-
ASPECT_INFO = {'keyForEntity': 'domain', 'entityCategory': 'core', 'entityAspects': ['domainProperties', 'institutionalMemory', 'ownership', 'structuredProperties', 'forms', 'testResults'], 'entityDoc': 'A data domain within an organization.'}
|
|
15489
|
+
ASPECT_INFO = {'keyForEntity': 'domain', 'entityCategory': 'core', 'entityAspects': ['domainProperties', 'institutionalMemory', 'ownership', 'structuredProperties', 'forms', 'testResults', 'displayProperties'], 'entityDoc': 'A data domain within an organization.'}
|
|
15490
15490
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DomainKey")
|
|
15491
15491
|
|
|
15492
15492
|
def __init__(self,
|
|
@@ -15631,7 +15631,7 @@ class GlossaryNodeKeyClass(_Aspect):
|
|
|
15631
15631
|
|
|
15632
15632
|
|
|
15633
15633
|
ASPECT_NAME = 'glossaryNodeKey'
|
|
15634
|
-
ASPECT_INFO = {'keyForEntity': 'glossaryNode', 'entityCategory': 'core', 'entityAspects': ['glossaryNodeInfo', 'institutionalMemory', 'ownership', 'status', 'structuredProperties', 'forms', 'testResults', 'subTypes']}
|
|
15634
|
+
ASPECT_INFO = {'keyForEntity': 'glossaryNode', 'entityCategory': 'core', 'entityAspects': ['glossaryNodeInfo', 'institutionalMemory', 'ownership', 'status', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'displayProperties']}
|
|
15635
15635
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.GlossaryNodeKey")
|
|
15636
15636
|
|
|
15637
15637
|
def __init__(self,
|
|
@@ -15831,7 +15831,7 @@ class MLModelDeploymentKeyClass(_Aspect):
|
|
|
15831
15831
|
|
|
15832
15832
|
|
|
15833
15833
|
ASPECT_NAME = 'mlModelDeploymentKey'
|
|
15834
|
-
ASPECT_INFO = {'keyForEntity': 'mlModelDeployment', 'entityCategory': 'core', 'entityAspects': ['mlModelDeploymentProperties', 'ownership', 'status', 'deprecation', 'globalTags', 'dataPlatformInstance', 'testResults']}
|
|
15834
|
+
ASPECT_INFO = {'keyForEntity': 'mlModelDeployment', 'entityCategory': 'core', 'entityAspects': ['mlModelDeploymentProperties', 'ownership', 'status', 'deprecation', 'globalTags', 'dataPlatformInstance', 'testResults', 'container']}
|
|
15835
15835
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelDeploymentKey")
|
|
15836
15836
|
|
|
15837
15837
|
def __init__(self,
|
|
@@ -15886,7 +15886,7 @@ class MLModelGroupKeyClass(_Aspect):
|
|
|
15886
15886
|
|
|
15887
15887
|
|
|
15888
15888
|
ASPECT_NAME = 'mlModelGroupKey'
|
|
15889
|
-
ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes']}
|
|
15889
|
+
ASPECT_INFO = {'keyForEntity': 'mlModelGroup', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelGroupProperties', 'domains', 'mlModelGroupProperties', 'ownership', 'status', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'subTypes', 'container']}
|
|
15890
15890
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelGroupKey")
|
|
15891
15891
|
|
|
15892
15892
|
def __init__(self,
|
|
@@ -15941,7 +15941,7 @@ class MLModelKeyClass(_Aspect):
|
|
|
15941
15941
|
|
|
15942
15942
|
|
|
15943
15943
|
ASPECT_NAME = 'mlModelKey'
|
|
15944
|
-
ASPECT_INFO = {'keyForEntity': 'mlModel', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelProperties', 'domains', 'ownership', 'mlModelProperties', 'intendedUse', 'mlModelFactorPrompts', 'mlModelMetrics', 'mlModelEvaluationData', 'mlModelTrainingData', 'mlModelQuantitativeAnalyses', 'mlModelEthicalConsiderations', 'mlModelCaveatsAndRecommendations', 'institutionalMemory', 'sourceCode', 'status', 'cost', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'versionProperties', 'subTypes']}
|
|
15944
|
+
ASPECT_INFO = {'keyForEntity': 'mlModel', 'entityCategory': 'core', 'entityAspects': ['glossaryTerms', 'editableMlModelProperties', 'domains', 'ownership', 'mlModelProperties', 'intendedUse', 'mlModelFactorPrompts', 'mlModelMetrics', 'mlModelEvaluationData', 'mlModelTrainingData', 'mlModelQuantitativeAnalyses', 'mlModelEthicalConsiderations', 'mlModelCaveatsAndRecommendations', 'institutionalMemory', 'sourceCode', 'status', 'cost', 'deprecation', 'browsePaths', 'globalTags', 'dataPlatformInstance', 'browsePathsV2', 'structuredProperties', 'forms', 'testResults', 'versionProperties', 'subTypes', 'container']}
|
|
15945
15945
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.MLModelKey")
|
|
15946
15946
|
|
|
15947
15947
|
def __init__(self,
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -15636,7 +15636,8 @@
|
|
|
15636
15636
|
"ownership",
|
|
15637
15637
|
"structuredProperties",
|
|
15638
15638
|
"forms",
|
|
15639
|
-
"testResults"
|
|
15639
|
+
"testResults",
|
|
15640
|
+
"displayProperties"
|
|
15640
15641
|
],
|
|
15641
15642
|
"entityDoc": "A data domain within an organization."
|
|
15642
15643
|
},
|
|
@@ -24,12 +24,19 @@ DIALECTS_WITH_CASE_INSENSITIVE_COLS = {
|
|
|
24
24
|
# For SQL server, the default collation rules mean that all identifiers (schema, table, column names)
|
|
25
25
|
# are case preserving but case insensitive.
|
|
26
26
|
"mssql",
|
|
27
|
+
# Oracle automatically converts unquoted identifiers to uppercase.
|
|
28
|
+
# https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Database-Object-Names-and-Qualifiers.html#GUID-3C59E44A-5140-4BCA-B9E1-3039C8050C49
|
|
29
|
+
# In our Oracle connector, we then normalize column names to lowercase. This behavior
|
|
30
|
+
# actually comes from the underlying Oracle sqlalchemy dialect.
|
|
31
|
+
# https://github.com/sqlalchemy/sqlalchemy/blob/d9b4d8ff3aae504402d324f3ebf0b8faff78f5dc/lib/sqlalchemy/dialects/oracle/base.py#L2579
|
|
32
|
+
"oracle",
|
|
27
33
|
}
|
|
28
34
|
DIALECTS_WITH_DEFAULT_UPPERCASE_COLS = {
|
|
29
35
|
# In some dialects, column identifiers are effectively case insensitive
|
|
30
36
|
# because they are automatically converted to uppercase. Most other systems
|
|
31
37
|
# automatically lowercase unquoted identifiers.
|
|
32
38
|
"snowflake",
|
|
39
|
+
"oracle",
|
|
33
40
|
}
|
|
34
41
|
assert DIALECTS_WITH_DEFAULT_UPPERCASE_COLS.issubset(
|
|
35
42
|
DIALECTS_WITH_CASE_INSENSITIVE_COLS
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|