acryl-datahub 0.15.0.6rc3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2552 -2523
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +204 -191
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +1 -1
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +3 -5
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +3 -3
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +6 -12
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +7 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +251 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +29 -5
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +20 -13
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
import logging
|
|
3
|
+
import platform
|
|
3
4
|
import re
|
|
4
5
|
|
|
5
6
|
# This import verifies that the dependencies are available.
|
|
@@ -85,6 +86,16 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|
|
85
86
|
description="The data dictionary views mode, to extract information about schema objects "
|
|
86
87
|
"('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)",
|
|
87
88
|
)
|
|
89
|
+
# oracledb settings to enable thick mode and client library location
|
|
90
|
+
enable_thick_mode: Optional[bool] = Field(
|
|
91
|
+
default=False,
|
|
92
|
+
description="Connection defaults to thin mode. Set to True to enable thick mode.",
|
|
93
|
+
)
|
|
94
|
+
thick_mode_lib_dir: Optional[str] = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
description="If using thick mode on Windows or Mac, set thick_mode_lib_dir to the oracle client libraries path. "
|
|
97
|
+
"On Linux, this value is ignored, as ldconfig or LD_LIBRARY_PATH will define the location.",
|
|
98
|
+
)
|
|
88
99
|
|
|
89
100
|
@pydantic.validator("service_name")
|
|
90
101
|
def check_service_name(cls, v, values):
|
|
@@ -100,6 +111,18 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|
|
100
111
|
raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
|
|
101
112
|
return values
|
|
102
113
|
|
|
114
|
+
@pydantic.validator("thick_mode_lib_dir", always=True)
|
|
115
|
+
def check_thick_mode_lib_dir(cls, v, values):
|
|
116
|
+
if (
|
|
117
|
+
v is None
|
|
118
|
+
and values.get("enable_thick_mode")
|
|
119
|
+
and (platform.system() == "Darwin" or platform.system() == "Windows")
|
|
120
|
+
):
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Specify 'thick_mode_lib_dir' on Mac/Windows when enable_thick_mode is true"
|
|
123
|
+
)
|
|
124
|
+
return v
|
|
125
|
+
|
|
103
126
|
def get_sql_alchemy_url(self):
|
|
104
127
|
url = super().get_sql_alchemy_url()
|
|
105
128
|
if self.service_name:
|
|
@@ -129,6 +152,7 @@ class OracleInspectorObjectWrapper:
|
|
|
129
152
|
self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX")
|
|
130
153
|
|
|
131
154
|
def get_db_name(self) -> str:
|
|
155
|
+
db_name = None
|
|
132
156
|
try:
|
|
133
157
|
# Try to retrieve current DB name by executing query
|
|
134
158
|
db_name = self._inspector_instance.bind.execute(
|
|
@@ -136,7 +160,12 @@ class OracleInspectorObjectWrapper:
|
|
|
136
160
|
).scalar()
|
|
137
161
|
return str(db_name)
|
|
138
162
|
except sqlalchemy.exc.DatabaseError as e:
|
|
139
|
-
|
|
163
|
+
self.report.failure(
|
|
164
|
+
title="Error fetching database name using sys_context.",
|
|
165
|
+
message="database_fetch_error",
|
|
166
|
+
context=db_name,
|
|
167
|
+
exc=e,
|
|
168
|
+
)
|
|
140
169
|
return ""
|
|
141
170
|
|
|
142
171
|
def get_schema_names(self) -> List[str]:
|
|
@@ -303,8 +332,8 @@ class OracleInspectorObjectWrapper:
|
|
|
303
332
|
try:
|
|
304
333
|
coltype = ischema_names[coltype]()
|
|
305
334
|
except KeyError:
|
|
306
|
-
logger.
|
|
307
|
-
f"
|
|
335
|
+
logger.info(
|
|
336
|
+
f"Unrecognized column datatype {coltype} of column {colname}"
|
|
308
337
|
)
|
|
309
338
|
coltype = sqltypes.NULLTYPE
|
|
310
339
|
|
|
@@ -356,8 +385,8 @@ class OracleInspectorObjectWrapper:
|
|
|
356
385
|
COMMENT_SQL = """
|
|
357
386
|
SELECT comments
|
|
358
387
|
FROM dba_tab_comments
|
|
359
|
-
WHERE table_name =
|
|
360
|
-
AND owner =
|
|
388
|
+
WHERE table_name = :table_name
|
|
389
|
+
AND owner = :schema_name
|
|
361
390
|
"""
|
|
362
391
|
|
|
363
392
|
c = self._inspector_instance.bind.execute(
|
|
@@ -374,79 +403,93 @@ class OracleInspectorObjectWrapper:
|
|
|
374
403
|
|
|
375
404
|
text = (
|
|
376
405
|
"SELECT"
|
|
377
|
-
"\nac.constraint_name,"
|
|
378
|
-
"\nac.constraint_type,"
|
|
379
|
-
"\
|
|
380
|
-
"\
|
|
381
|
-
"\
|
|
382
|
-
"\
|
|
383
|
-
"\
|
|
384
|
-
"\
|
|
385
|
-
"\nac.search_condition,"
|
|
386
|
-
"\nac.delete_rule"
|
|
387
|
-
"\nFROM dba_constraints
|
|
388
|
-
"\
|
|
389
|
-
"\
|
|
390
|
-
"\
|
|
391
|
-
"\nAND ac.
|
|
406
|
+
"\nac.constraint_name,"
|
|
407
|
+
"\nac.constraint_type,"
|
|
408
|
+
"\nacc.column_name AS local_column,"
|
|
409
|
+
"\nNULL AS remote_table,"
|
|
410
|
+
"\nNULL AS remote_column,"
|
|
411
|
+
"\nNULL AS remote_owner,"
|
|
412
|
+
"\nacc.position AS loc_pos,"
|
|
413
|
+
"\nNULL AS rem_pos,"
|
|
414
|
+
"\nac.search_condition,"
|
|
415
|
+
"\nac.delete_rule"
|
|
416
|
+
"\nFROM dba_constraints ac"
|
|
417
|
+
"\nJOIN dba_cons_columns acc"
|
|
418
|
+
"\nON ac.owner = acc.owner"
|
|
419
|
+
"\nAND ac.constraint_name = acc.constraint_name"
|
|
420
|
+
"\nAND ac.table_name = acc.table_name"
|
|
421
|
+
"\nWHERE ac.table_name = :table_name"
|
|
422
|
+
"\nAND ac.constraint_type IN ('P', 'U', 'C')"
|
|
392
423
|
)
|
|
393
424
|
|
|
394
425
|
if schema is not None:
|
|
395
426
|
params["owner"] = schema
|
|
396
|
-
text += "\nAND ac.owner =
|
|
427
|
+
text += "\nAND ac.owner = :owner"
|
|
397
428
|
|
|
429
|
+
# Splitting into queries with UNION ALL for execution efficiency
|
|
398
430
|
text += (
|
|
399
|
-
"\
|
|
400
|
-
"\
|
|
401
|
-
"\
|
|
402
|
-
"\
|
|
403
|
-
"\
|
|
404
|
-
"\
|
|
431
|
+
"\nUNION ALL"
|
|
432
|
+
"\nSELECT"
|
|
433
|
+
"\nac.constraint_name,"
|
|
434
|
+
"\nac.constraint_type,"
|
|
435
|
+
"\nacc.column_name AS local_column,"
|
|
436
|
+
"\nac.r_table_name AS remote_table,"
|
|
437
|
+
"\nrcc.column_name AS remote_column,"
|
|
438
|
+
"\nac.r_owner AS remote_owner,"
|
|
439
|
+
"\nacc.position AS loc_pos,"
|
|
440
|
+
"\nrcc.position AS rem_pos,"
|
|
441
|
+
"\nac.search_condition,"
|
|
442
|
+
"\nac.delete_rule"
|
|
443
|
+
"\nFROM dba_constraints ac"
|
|
444
|
+
"\nJOIN dba_cons_columns acc"
|
|
445
|
+
"\nON ac.owner = acc.owner"
|
|
446
|
+
"\nAND ac.constraint_name = acc.constraint_name"
|
|
447
|
+
"\nAND ac.table_name = acc.table_name"
|
|
448
|
+
"\nLEFT JOIN dba_cons_columns rcc"
|
|
449
|
+
"\nON ac.r_owner = rcc.owner"
|
|
450
|
+
"\nAND ac.r_constraint_name = rcc.constraint_name"
|
|
451
|
+
"\nAND acc.position = rcc.position"
|
|
452
|
+
"\nWHERE ac.table_name = :table_name"
|
|
453
|
+
"\nAND ac.constraint_type = 'R'"
|
|
405
454
|
)
|
|
406
455
|
|
|
407
|
-
|
|
456
|
+
if schema is not None:
|
|
457
|
+
text += "\nAND ac.owner = :owner"
|
|
458
|
+
|
|
459
|
+
text += "\nORDER BY constraint_name, loc_pos"
|
|
460
|
+
|
|
408
461
|
rp = self._inspector_instance.bind.execute(sql.text(text), params)
|
|
409
|
-
|
|
410
|
-
return constraint_data
|
|
462
|
+
return rp.fetchall()
|
|
411
463
|
|
|
412
464
|
def get_pk_constraint(
|
|
413
465
|
self, table_name: str, schema: Optional[str] = None, dblink: str = ""
|
|
414
466
|
) -> Dict:
|
|
415
|
-
denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
|
|
416
|
-
table_name
|
|
417
|
-
)
|
|
418
|
-
assert denormalized_table_name
|
|
419
|
-
|
|
420
|
-
schema = self._inspector_instance.dialect.denormalize_name(
|
|
421
|
-
schema or self.default_schema_name
|
|
422
|
-
)
|
|
423
|
-
|
|
424
|
-
if schema is None:
|
|
425
|
-
schema = self._inspector_instance.dialect.default_schema_name
|
|
426
|
-
|
|
427
467
|
pkeys = []
|
|
428
468
|
constraint_name = None
|
|
429
|
-
constraint_data = self._get_constraint_data(
|
|
430
|
-
denormalized_table_name, schema, dblink
|
|
431
|
-
)
|
|
432
469
|
|
|
433
|
-
|
|
434
|
-
(
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
470
|
+
try:
|
|
471
|
+
for row in self._get_constraint_data(table_name, schema, dblink):
|
|
472
|
+
if row[1] == "P": # constraint_type is 'P' for primary key
|
|
473
|
+
if constraint_name is None:
|
|
474
|
+
constraint_name = (
|
|
475
|
+
self._inspector_instance.dialect.normalize_name(row[0])
|
|
476
|
+
)
|
|
477
|
+
col_name = self._inspector_instance.dialect.normalize_name(
|
|
478
|
+
row[2]
|
|
479
|
+
) # local_column
|
|
480
|
+
pkeys.append(col_name)
|
|
481
|
+
except Exception as e:
|
|
482
|
+
self.report.warning(
|
|
483
|
+
title="Failed to Process Primary Keys",
|
|
484
|
+
message=(
|
|
485
|
+
f"Unable to process primary key constraints for {schema}.{table_name}. "
|
|
486
|
+
"Ensure SELECT access on DBA_CONSTRAINTS and DBA_CONS_COLUMNS.",
|
|
487
|
+
),
|
|
488
|
+
context=f"{schema}.{table_name}",
|
|
489
|
+
exc=e,
|
|
443
490
|
)
|
|
444
|
-
if
|
|
445
|
-
|
|
446
|
-
constraint_name = self._inspector_instance.dialect.normalize_name(
|
|
447
|
-
cons_name
|
|
448
|
-
)
|
|
449
|
-
pkeys.append(local_column)
|
|
491
|
+
# Return empty constraint if we can't process it
|
|
492
|
+
return {"constrained_columns": [], "name": None}
|
|
450
493
|
|
|
451
494
|
return {"constrained_columns": pkeys, "name": constraint_name}
|
|
452
495
|
|
|
@@ -504,6 +547,16 @@ class OracleInspectorObjectWrapper:
|
|
|
504
547
|
f"dba_cons_columns{dblink} - does the user have "
|
|
505
548
|
"proper rights to the table?"
|
|
506
549
|
)
|
|
550
|
+
self.report.warning(
|
|
551
|
+
title="Missing Table Permissions",
|
|
552
|
+
message=(
|
|
553
|
+
f"Unable to query table_name from dba_cons_columns{dblink}. "
|
|
554
|
+
"This usually indicates insufficient permissions on the target table. "
|
|
555
|
+
f"Foreign key relationships will not be detected for {schema}.{table_name}. "
|
|
556
|
+
"Please ensure the user has SELECT privileges on dba_cons_columns."
|
|
557
|
+
),
|
|
558
|
+
context=f"{schema}.{table_name}",
|
|
559
|
+
)
|
|
507
560
|
|
|
508
561
|
rec = fkeys[cons_name]
|
|
509
562
|
rec["name"] = cons_name
|
|
@@ -550,8 +603,8 @@ class OracleInspectorObjectWrapper:
|
|
|
550
603
|
text = "SELECT text FROM dba_views WHERE view_name=:view_name"
|
|
551
604
|
|
|
552
605
|
if schema is not None:
|
|
553
|
-
|
|
554
|
-
|
|
606
|
+
params["owner"] = schema
|
|
607
|
+
text += "\nAND owner = :owner"
|
|
555
608
|
|
|
556
609
|
rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar()
|
|
557
610
|
|
|
@@ -586,6 +639,17 @@ class OracleSource(SQLAlchemySource):
|
|
|
586
639
|
def __init__(self, config, ctx):
|
|
587
640
|
super().__init__(config, ctx, "oracle")
|
|
588
641
|
|
|
642
|
+
# if connecting to oracle with enable_thick_mode, it must be initialized before calling
|
|
643
|
+
# create_engine, which is called in get_inspectors()
|
|
644
|
+
# https://python-oracledb.readthedocs.io/en/latest/user_guide/initialization.html#enabling-python-oracledb-thick-mode
|
|
645
|
+
if self.config.enable_thick_mode:
|
|
646
|
+
if platform.system() == "Darwin" or platform.system() == "Windows":
|
|
647
|
+
# windows and mac os require lib_dir to be set explicitly
|
|
648
|
+
oracledb.init_oracle_client(lib_dir=self.config.thick_mode_lib_dir)
|
|
649
|
+
else:
|
|
650
|
+
# linux requires configurating the library path with ldconfig or LD_LIBRARY_PATH
|
|
651
|
+
oracledb.init_oracle_client()
|
|
652
|
+
|
|
589
653
|
@classmethod
|
|
590
654
|
def create(cls, config_dict, ctx):
|
|
591
655
|
config = OracleConfig.parse_obj(config_dict)
|
|
@@ -204,7 +204,7 @@ def get_column_type(
|
|
|
204
204
|
"""
|
|
205
205
|
|
|
206
206
|
TypeClass: Optional[Type] = None
|
|
207
|
-
for sql_type in _field_type_mapping
|
|
207
|
+
for sql_type in _field_type_mapping:
|
|
208
208
|
if isinstance(column_type, sql_type):
|
|
209
209
|
TypeClass = _field_type_mapping[sql_type]
|
|
210
210
|
break
|
|
@@ -635,7 +635,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
635
635
|
|
|
636
636
|
return None
|
|
637
637
|
|
|
638
|
-
def loop_tables(
|
|
638
|
+
def loop_tables(
|
|
639
639
|
self,
|
|
640
640
|
inspector: Inspector,
|
|
641
641
|
schema: str,
|
|
@@ -973,7 +973,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
973
973
|
inspector=inspector,
|
|
974
974
|
)
|
|
975
975
|
),
|
|
976
|
-
description=column.get("comment"
|
|
976
|
+
description=column.get("comment"),
|
|
977
977
|
nullable=column["nullable"],
|
|
978
978
|
recursive=False,
|
|
979
979
|
globalTags=gtc,
|
|
@@ -1031,16 +1031,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
1031
1031
|
def _get_view_definition(self, inspector: Inspector, schema: str, view: str) -> str:
|
|
1032
1032
|
try:
|
|
1033
1033
|
view_definition = inspector.get_view_definition(view, schema)
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
else:
|
|
1037
|
-
# Some dialects return a TextClause instead of a raw string,
|
|
1038
|
-
# so we need to convert them to a string.
|
|
1039
|
-
view_definition = str(view_definition)
|
|
1034
|
+
# Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
|
|
1035
|
+
return str(view_definition) if view_definition else ""
|
|
1040
1036
|
except NotImplementedError:
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
return view_definition
|
|
1037
|
+
return ""
|
|
1044
1038
|
|
|
1045
1039
|
def _process_view(
|
|
1046
1040
|
self,
|
|
@@ -317,10 +317,10 @@ def resolve_snowflake_modified_type(type_string: str) -> Any:
|
|
|
317
317
|
match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
|
|
318
318
|
if match:
|
|
319
319
|
modified_type_base = match.group(1) # Extract the base type
|
|
320
|
-
return SNOWFLAKE_TYPES_MAP.get(modified_type_base
|
|
320
|
+
return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
|
|
321
321
|
|
|
322
322
|
# Fallback for types without precision/scale
|
|
323
|
-
return SNOWFLAKE_TYPES_MAP.get(type_string
|
|
323
|
+
return SNOWFLAKE_TYPES_MAP.get(type_string)
|
|
324
324
|
|
|
325
325
|
|
|
326
326
|
# see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
|
|
@@ -180,10 +180,11 @@ def optimized_get_columns(
|
|
|
180
180
|
connection: Connection,
|
|
181
181
|
table_name: str,
|
|
182
182
|
schema: Optional[str] = None,
|
|
183
|
-
tables_cache: MutableMapping[str, List[TeradataTable]] =
|
|
183
|
+
tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
|
|
184
184
|
use_qvci: bool = False,
|
|
185
185
|
**kw: Dict[str, Any],
|
|
186
186
|
) -> List[Dict]:
|
|
187
|
+
tables_cache = tables_cache or {}
|
|
187
188
|
if schema is None:
|
|
188
189
|
schema = self.default_schema_name
|
|
189
190
|
|
|
@@ -314,9 +315,10 @@ def optimized_get_view_definition(
|
|
|
314
315
|
connection: Connection,
|
|
315
316
|
view_name: str,
|
|
316
317
|
schema: Optional[str] = None,
|
|
317
|
-
tables_cache: MutableMapping[str, List[TeradataTable]] =
|
|
318
|
+
tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
|
|
318
319
|
**kw: Dict[str, Any],
|
|
319
320
|
) -> Optional[str]:
|
|
321
|
+
tables_cache = tables_cache or {}
|
|
320
322
|
if schema is None:
|
|
321
323
|
schema = self.default_schema_name
|
|
322
324
|
|
|
@@ -649,7 +651,7 @@ ORDER by DataBaseName, TableName;
|
|
|
649
651
|
)
|
|
650
652
|
|
|
651
653
|
# Disabling the below because the cached view definition is not the view definition the column in tablesv actually holds the last statement executed against the object... not necessarily the view definition
|
|
652
|
-
# setattr(
|
|
654
|
+
# setattr(
|
|
653
655
|
# TeradataDialect,
|
|
654
656
|
# "get_view_definition",
|
|
655
657
|
# lambda self, connection, view_name, schema=None, **kw: optimized_get_view_definition(
|
|
@@ -746,7 +748,7 @@ ORDER by DataBaseName, TableName;
|
|
|
746
748
|
else:
|
|
747
749
|
raise Exception("Unable to get database name from Sqlalchemy inspector")
|
|
748
750
|
|
|
749
|
-
def cached_loop_tables(
|
|
751
|
+
def cached_loop_tables(
|
|
750
752
|
self,
|
|
751
753
|
inspector: Inspector,
|
|
752
754
|
schema: str,
|
|
@@ -782,7 +784,7 @@ ORDER by DataBaseName, TableName;
|
|
|
782
784
|
break
|
|
783
785
|
return description, properties, location
|
|
784
786
|
|
|
785
|
-
def cached_loop_views(
|
|
787
|
+
def cached_loop_views(
|
|
786
788
|
self,
|
|
787
789
|
inspector: Inspector,
|
|
788
790
|
schema: str,
|
|
@@ -142,7 +142,7 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k
|
|
|
142
142
|
if col_value is not None:
|
|
143
143
|
properties[col_name] = col_value
|
|
144
144
|
|
|
145
|
-
return {"text": properties.get("comment"
|
|
145
|
+
return {"text": properties.get("comment"), "properties": properties}
|
|
146
146
|
else:
|
|
147
147
|
return self.get_table_comment_default(connection, table_name, schema)
|
|
148
148
|
except Exception:
|
|
@@ -483,7 +483,7 @@ def _parse_struct_fields(parts):
|
|
|
483
483
|
|
|
484
484
|
|
|
485
485
|
def _parse_basic_datatype(s):
|
|
486
|
-
for sql_type in _all_atomic_types
|
|
486
|
+
for sql_type in _all_atomic_types:
|
|
487
487
|
if isinstance(s, sql_type):
|
|
488
488
|
return {
|
|
489
489
|
"type": _all_atomic_types[sql_type],
|
|
@@ -114,14 +114,10 @@ class StaleEntityRemovalHandler(
|
|
|
114
114
|
self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
|
|
115
115
|
config.stateful_ingestion
|
|
116
116
|
)
|
|
117
|
-
self.checkpointing_enabled: bool = (
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
and self.stateful_ingestion_config
|
|
122
|
-
and self.stateful_ingestion_config.remove_stale_metadata
|
|
123
|
-
)
|
|
124
|
-
else False
|
|
117
|
+
self.checkpointing_enabled: bool = bool(
|
|
118
|
+
self.state_provider.is_stateful_ingestion_configured()
|
|
119
|
+
and self.stateful_ingestion_config
|
|
120
|
+
and self.stateful_ingestion_config.remove_stale_metadata
|
|
125
121
|
)
|
|
126
122
|
self._job_id = self._init_job_id()
|
|
127
123
|
self._urns_to_skip: Set[str] = set()
|