acryl-datahub 0.15.0.6rc3__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (204) hide show
  1. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2552 -2523
  2. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +204 -191
  3. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +4 -3
  7. datahub/api/entities/dataset/dataset.py +731 -42
  8. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  9. datahub/cli/check_cli.py +72 -19
  10. datahub/cli/docker_cli.py +3 -3
  11. datahub/cli/iceberg_cli.py +1 -1
  12. datahub/cli/ingest_cli.py +30 -93
  13. datahub/cli/lite_cli.py +4 -2
  14. datahub/cli/specific/dataproduct_cli.py +1 -1
  15. datahub/cli/specific/dataset_cli.py +128 -14
  16. datahub/configuration/common.py +10 -2
  17. datahub/configuration/git.py +1 -3
  18. datahub/configuration/kafka.py +1 -1
  19. datahub/emitter/mce_builder.py +28 -13
  20. datahub/emitter/mcp_builder.py +4 -1
  21. datahub/emitter/response_helper.py +145 -0
  22. datahub/emitter/rest_emitter.py +323 -10
  23. datahub/ingestion/api/decorators.py +1 -1
  24. datahub/ingestion/api/source_helpers.py +4 -0
  25. datahub/ingestion/fs/s3_fs.py +2 -2
  26. datahub/ingestion/glossary/classification_mixin.py +1 -5
  27. datahub/ingestion/graph/client.py +41 -22
  28. datahub/ingestion/graph/entity_versioning.py +3 -3
  29. datahub/ingestion/graph/filters.py +64 -37
  30. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
  31. datahub/ingestion/run/pipeline.py +112 -148
  32. datahub/ingestion/run/sink_callback.py +77 -0
  33. datahub/ingestion/sink/datahub_rest.py +8 -0
  34. datahub/ingestion/source/abs/config.py +2 -4
  35. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  36. datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
  37. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
  38. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
  39. datahub/ingestion/source/cassandra/cassandra.py +152 -233
  40. datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
  41. datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
  42. datahub/ingestion/source/common/subtypes.py +12 -0
  43. datahub/ingestion/source/csv_enricher.py +3 -3
  44. datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
  45. datahub/ingestion/source/dbt/dbt_common.py +3 -5
  46. datahub/ingestion/source/dbt/dbt_tests.py +4 -8
  47. datahub/ingestion/source/delta_lake/config.py +8 -1
  48. datahub/ingestion/source/delta_lake/report.py +4 -2
  49. datahub/ingestion/source/delta_lake/source.py +20 -5
  50. datahub/ingestion/source/dremio/dremio_api.py +4 -8
  51. datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
  52. datahub/ingestion/source/dynamodb/dynamodb.py +1 -0
  53. datahub/ingestion/source/elastic_search.py +26 -6
  54. datahub/ingestion/source/feast.py +27 -8
  55. datahub/ingestion/source/file.py +6 -3
  56. datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
  57. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  58. datahub/ingestion/source/ge_data_profiler.py +12 -15
  59. datahub/ingestion/source/iceberg/iceberg.py +46 -12
  60. datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
  61. datahub/ingestion/source/identity/okta.py +37 -7
  62. datahub/ingestion/source/kafka/kafka.py +1 -1
  63. datahub/ingestion/source/kafka_connect/common.py +2 -7
  64. datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
  65. datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
  66. datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
  67. datahub/ingestion/source/looker/looker_common.py +3 -3
  68. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  69. datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
  70. datahub/ingestion/source/looker/looker_source.py +1 -1
  71. datahub/ingestion/source/looker/looker_template_language.py +4 -2
  72. datahub/ingestion/source/looker/lookml_source.py +3 -2
  73. datahub/ingestion/source/metabase.py +57 -35
  74. datahub/ingestion/source/metadata/business_glossary.py +45 -3
  75. datahub/ingestion/source/metadata/lineage.py +2 -2
  76. datahub/ingestion/source/mlflow.py +365 -35
  77. datahub/ingestion/source/mode.py +18 -8
  78. datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
  79. datahub/ingestion/source/nifi.py +37 -11
  80. datahub/ingestion/source/openapi.py +1 -1
  81. datahub/ingestion/source/openapi_parser.py +49 -17
  82. datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
  83. datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
  84. datahub/ingestion/source/powerbi/powerbi.py +1 -3
  85. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
  86. datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
  87. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
  88. datahub/ingestion/source/preset.py +7 -4
  89. datahub/ingestion/source/pulsar.py +3 -2
  90. datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
  91. datahub/ingestion/source/redash.py +31 -7
  92. datahub/ingestion/source/redshift/config.py +4 -0
  93. datahub/ingestion/source/redshift/datashares.py +236 -0
  94. datahub/ingestion/source/redshift/lineage.py +6 -2
  95. datahub/ingestion/source/redshift/lineage_v2.py +24 -9
  96. datahub/ingestion/source/redshift/profile.py +1 -1
  97. datahub/ingestion/source/redshift/query.py +133 -33
  98. datahub/ingestion/source/redshift/redshift.py +46 -73
  99. datahub/ingestion/source/redshift/redshift_schema.py +186 -6
  100. datahub/ingestion/source/redshift/report.py +3 -0
  101. datahub/ingestion/source/s3/config.py +5 -5
  102. datahub/ingestion/source/s3/source.py +20 -41
  103. datahub/ingestion/source/salesforce.py +550 -275
  104. datahub/ingestion/source/schema_inference/object.py +1 -1
  105. datahub/ingestion/source/sigma/sigma.py +1 -1
  106. datahub/ingestion/source/slack/slack.py +31 -10
  107. datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
  108. datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
  109. datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
  110. datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
  111. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  112. datahub/ingestion/source/sql/athena.py +10 -16
  113. datahub/ingestion/source/sql/druid.py +1 -5
  114. datahub/ingestion/source/sql/hive.py +15 -6
  115. datahub/ingestion/source/sql/hive_metastore.py +3 -2
  116. datahub/ingestion/source/sql/mssql/job_models.py +29 -0
  117. datahub/ingestion/source/sql/mssql/source.py +11 -5
  118. datahub/ingestion/source/sql/oracle.py +127 -63
  119. datahub/ingestion/source/sql/sql_common.py +6 -12
  120. datahub/ingestion/source/sql/sql_types.py +2 -2
  121. datahub/ingestion/source/sql/teradata.py +7 -5
  122. datahub/ingestion/source/sql/trino.py +2 -2
  123. datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
  124. datahub/ingestion/source/superset.py +222 -62
  125. datahub/ingestion/source/tableau/tableau.py +22 -6
  126. datahub/ingestion/source/tableau/tableau_common.py +3 -2
  127. datahub/ingestion/source/unity/ge_profiler.py +2 -1
  128. datahub/ingestion/source/unity/source.py +11 -1
  129. datahub/ingestion/source/vertexai.py +697 -0
  130. datahub/ingestion/source_config/pulsar.py +3 -1
  131. datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
  132. datahub/lite/duckdb_lite.py +3 -10
  133. datahub/lite/lite_local.py +1 -1
  134. datahub/lite/lite_util.py +4 -3
  135. datahub/metadata/_schema_classes.py +714 -417
  136. datahub/metadata/_urns/urn_defs.py +1673 -1649
  137. datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
  138. datahub/metadata/schema.avsc +16438 -16603
  139. datahub/metadata/schemas/AssertionInfo.avsc +3 -1
  140. datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  141. datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
  142. datahub/metadata/schemas/ChartInfo.avsc +1 -0
  143. datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
  144. datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
  145. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  146. datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
  147. datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
  148. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
  149. datahub/metadata/schemas/DataProcessKey.avsc +2 -1
  150. datahub/metadata/schemas/DataProductKey.avsc +2 -1
  151. datahub/metadata/schemas/DomainKey.avsc +2 -1
  152. datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  153. datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
  154. datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
  155. datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
  156. datahub/metadata/schemas/IncidentInfo.avsc +130 -46
  157. datahub/metadata/schemas/InputFields.avsc +3 -1
  158. datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
  159. datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
  160. datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
  161. datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
  162. datahub/metadata/schemas/MLModelKey.avsc +3 -1
  163. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
  164. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
  165. datahub/metadata/schemas/PostKey.avsc +2 -1
  166. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  167. datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
  168. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
  169. datahub/metadata/schemas/VersionProperties.avsc +18 -0
  170. datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
  171. datahub/pydantic/__init__.py +0 -0
  172. datahub/pydantic/compat.py +58 -0
  173. datahub/sdk/__init__.py +30 -12
  174. datahub/sdk/_all_entities.py +1 -1
  175. datahub/sdk/_attribution.py +4 -0
  176. datahub/sdk/_shared.py +251 -16
  177. datahub/sdk/_utils.py +35 -0
  178. datahub/sdk/container.py +29 -5
  179. datahub/sdk/dataset.py +118 -20
  180. datahub/sdk/{_entity.py → entity.py} +24 -1
  181. datahub/sdk/entity_client.py +1 -1
  182. datahub/sdk/main_client.py +23 -0
  183. datahub/sdk/resolver_client.py +17 -29
  184. datahub/sdk/search_client.py +50 -0
  185. datahub/sdk/search_filters.py +374 -0
  186. datahub/specific/dataset.py +3 -4
  187. datahub/sql_parsing/_sqlglot_patch.py +2 -10
  188. datahub/sql_parsing/schema_resolver.py +1 -1
  189. datahub/sql_parsing/split_statements.py +20 -13
  190. datahub/sql_parsing/sql_parsing_common.py +7 -0
  191. datahub/sql_parsing/sqlglot_lineage.py +1 -1
  192. datahub/sql_parsing/sqlglot_utils.py +1 -4
  193. datahub/testing/check_sql_parser_result.py +5 -6
  194. datahub/testing/compare_metadata_json.py +7 -6
  195. datahub/testing/pytest_hooks.py +56 -0
  196. datahub/upgrade/upgrade.py +2 -2
  197. datahub/utilities/file_backed_collections.py +3 -14
  198. datahub/utilities/ingest_utils.py +106 -0
  199. datahub/utilities/mapping.py +1 -1
  200. datahub/utilities/memory_footprint.py +3 -2
  201. datahub/utilities/sentinels.py +22 -0
  202. datahub/utilities/unified_diff.py +5 -1
  203. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
  204. {acryl_datahub-0.15.0.6rc3.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import datetime
2
2
  import logging
3
+ import platform
3
4
  import re
4
5
 
5
6
  # This import verifies that the dependencies are available.
@@ -85,6 +86,16 @@ class OracleConfig(BasicSQLAlchemyConfig):
85
86
  description="The data dictionary views mode, to extract information about schema objects "
86
87
  "('ALL' and 'DBA' views are supported). (https://docs.oracle.com/cd/E11882_01/nav/catalog_views.htm)",
87
88
  )
89
+ # oracledb settings to enable thick mode and client library location
90
+ enable_thick_mode: Optional[bool] = Field(
91
+ default=False,
92
+ description="Connection defaults to thin mode. Set to True to enable thick mode.",
93
+ )
94
+ thick_mode_lib_dir: Optional[str] = Field(
95
+ default=None,
96
+ description="If using thick mode on Windows or Mac, set thick_mode_lib_dir to the oracle client libraries path. "
97
+ "On Linux, this value is ignored, as ldconfig or LD_LIBRARY_PATH will define the location.",
98
+ )
88
99
 
89
100
  @pydantic.validator("service_name")
90
101
  def check_service_name(cls, v, values):
@@ -100,6 +111,18 @@ class OracleConfig(BasicSQLAlchemyConfig):
100
111
  raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
101
112
  return values
102
113
 
114
+ @pydantic.validator("thick_mode_lib_dir", always=True)
115
+ def check_thick_mode_lib_dir(cls, v, values):
116
+ if (
117
+ v is None
118
+ and values.get("enable_thick_mode")
119
+ and (platform.system() == "Darwin" or platform.system() == "Windows")
120
+ ):
121
+ raise ValueError(
122
+ "Specify 'thick_mode_lib_dir' on Mac/Windows when enable_thick_mode is true"
123
+ )
124
+ return v
125
+
103
126
  def get_sql_alchemy_url(self):
104
127
  url = super().get_sql_alchemy_url()
105
128
  if self.service_name:
@@ -129,6 +152,7 @@ class OracleInspectorObjectWrapper:
129
152
  self.exclude_tablespaces: Tuple[str, str] = ("SYSTEM", "SYSAUX")
130
153
 
131
154
  def get_db_name(self) -> str:
155
+ db_name = None
132
156
  try:
133
157
  # Try to retrieve current DB name by executing query
134
158
  db_name = self._inspector_instance.bind.execute(
@@ -136,7 +160,12 @@ class OracleInspectorObjectWrapper:
136
160
  ).scalar()
137
161
  return str(db_name)
138
162
  except sqlalchemy.exc.DatabaseError as e:
139
- logger.error("Error fetching DB name: " + str(e))
163
+ self.report.failure(
164
+ title="Error fetching database name using sys_context.",
165
+ message="database_fetch_error",
166
+ context=db_name,
167
+ exc=e,
168
+ )
140
169
  return ""
141
170
 
142
171
  def get_schema_names(self) -> List[str]:
@@ -303,8 +332,8 @@ class OracleInspectorObjectWrapper:
303
332
  try:
304
333
  coltype = ischema_names[coltype]()
305
334
  except KeyError:
306
- logger.warning(
307
- f"Did not recognize type {coltype} of column {colname}"
335
+ logger.info(
336
+ f"Unrecognized column datatype {coltype} of column {colname}"
308
337
  )
309
338
  coltype = sqltypes.NULLTYPE
310
339
 
@@ -356,8 +385,8 @@ class OracleInspectorObjectWrapper:
356
385
  COMMENT_SQL = """
357
386
  SELECT comments
358
387
  FROM dba_tab_comments
359
- WHERE table_name = CAST(:table_name AS VARCHAR(128))
360
- AND owner = CAST(:schema_name AS VARCHAR(128))
388
+ WHERE table_name = :table_name
389
+ AND owner = :schema_name
361
390
  """
362
391
 
363
392
  c = self._inspector_instance.bind.execute(
@@ -374,79 +403,93 @@ class OracleInspectorObjectWrapper:
374
403
 
375
404
  text = (
376
405
  "SELECT"
377
- "\nac.constraint_name," # 0
378
- "\nac.constraint_type," # 1
379
- "\nloc.column_name AS local_column," # 2
380
- "\nrem.table_name AS remote_table," # 3
381
- "\nrem.column_name AS remote_column," # 4
382
- "\nrem.owner AS remote_owner," # 5
383
- "\nloc.position as loc_pos," # 6
384
- "\nrem.position as rem_pos," # 7
385
- "\nac.search_condition," # 8
386
- "\nac.delete_rule" # 9
387
- "\nFROM dba_constraints%(dblink)s ac,"
388
- "\ndba_cons_columns%(dblink)s loc,"
389
- "\ndba_cons_columns%(dblink)s rem"
390
- "\nWHERE ac.table_name = CAST(:table_name AS VARCHAR2(128))"
391
- "\nAND ac.constraint_type IN ('R','P', 'U', 'C')"
406
+ "\nac.constraint_name,"
407
+ "\nac.constraint_type,"
408
+ "\nacc.column_name AS local_column,"
409
+ "\nNULL AS remote_table,"
410
+ "\nNULL AS remote_column,"
411
+ "\nNULL AS remote_owner,"
412
+ "\nacc.position AS loc_pos,"
413
+ "\nNULL AS rem_pos,"
414
+ "\nac.search_condition,"
415
+ "\nac.delete_rule"
416
+ "\nFROM dba_constraints ac"
417
+ "\nJOIN dba_cons_columns acc"
418
+ "\nON ac.owner = acc.owner"
419
+ "\nAND ac.constraint_name = acc.constraint_name"
420
+ "\nAND ac.table_name = acc.table_name"
421
+ "\nWHERE ac.table_name = :table_name"
422
+ "\nAND ac.constraint_type IN ('P', 'U', 'C')"
392
423
  )
393
424
 
394
425
  if schema is not None:
395
426
  params["owner"] = schema
396
- text += "\nAND ac.owner = CAST(:owner AS VARCHAR2(128))"
427
+ text += "\nAND ac.owner = :owner"
397
428
 
429
+ # Splitting into queries with UNION ALL for execution efficiency
398
430
  text += (
399
- "\nAND ac.owner = loc.owner"
400
- "\nAND ac.constraint_name = loc.constraint_name"
401
- "\nAND ac.r_owner = rem.owner(+)"
402
- "\nAND ac.r_constraint_name = rem.constraint_name(+)"
403
- "\nAND (rem.position IS NULL or loc.position=rem.position)"
404
- "\nORDER BY ac.constraint_name, loc.position"
431
+ "\nUNION ALL"
432
+ "\nSELECT"
433
+ "\nac.constraint_name,"
434
+ "\nac.constraint_type,"
435
+ "\nacc.column_name AS local_column,"
436
+ "\nac.r_table_name AS remote_table,"
437
+ "\nrcc.column_name AS remote_column,"
438
+ "\nac.r_owner AS remote_owner,"
439
+ "\nacc.position AS loc_pos,"
440
+ "\nrcc.position AS rem_pos,"
441
+ "\nac.search_condition,"
442
+ "\nac.delete_rule"
443
+ "\nFROM dba_constraints ac"
444
+ "\nJOIN dba_cons_columns acc"
445
+ "\nON ac.owner = acc.owner"
446
+ "\nAND ac.constraint_name = acc.constraint_name"
447
+ "\nAND ac.table_name = acc.table_name"
448
+ "\nLEFT JOIN dba_cons_columns rcc"
449
+ "\nON ac.r_owner = rcc.owner"
450
+ "\nAND ac.r_constraint_name = rcc.constraint_name"
451
+ "\nAND acc.position = rcc.position"
452
+ "\nWHERE ac.table_name = :table_name"
453
+ "\nAND ac.constraint_type = 'R'"
405
454
  )
406
455
 
407
- text = text % {"dblink": dblink}
456
+ if schema is not None:
457
+ text += "\nAND ac.owner = :owner"
458
+
459
+ text += "\nORDER BY constraint_name, loc_pos"
460
+
408
461
  rp = self._inspector_instance.bind.execute(sql.text(text), params)
409
- constraint_data = rp.fetchall()
410
- return constraint_data
462
+ return rp.fetchall()
411
463
 
412
464
  def get_pk_constraint(
413
465
  self, table_name: str, schema: Optional[str] = None, dblink: str = ""
414
466
  ) -> Dict:
415
- denormalized_table_name = self._inspector_instance.dialect.denormalize_name(
416
- table_name
417
- )
418
- assert denormalized_table_name
419
-
420
- schema = self._inspector_instance.dialect.denormalize_name(
421
- schema or self.default_schema_name
422
- )
423
-
424
- if schema is None:
425
- schema = self._inspector_instance.dialect.default_schema_name
426
-
427
467
  pkeys = []
428
468
  constraint_name = None
429
- constraint_data = self._get_constraint_data(
430
- denormalized_table_name, schema, dblink
431
- )
432
469
 
433
- for row in constraint_data:
434
- (
435
- cons_name,
436
- cons_type,
437
- local_column,
438
- remote_table,
439
- remote_column,
440
- remote_owner,
441
- ) = row[0:2] + tuple(
442
- [self._inspector_instance.dialect.normalize_name(x) for x in row[2:6]]
470
+ try:
471
+ for row in self._get_constraint_data(table_name, schema, dblink):
472
+ if row[1] == "P": # constraint_type is 'P' for primary key
473
+ if constraint_name is None:
474
+ constraint_name = (
475
+ self._inspector_instance.dialect.normalize_name(row[0])
476
+ )
477
+ col_name = self._inspector_instance.dialect.normalize_name(
478
+ row[2]
479
+ ) # local_column
480
+ pkeys.append(col_name)
481
+ except Exception as e:
482
+ self.report.warning(
483
+ title="Failed to Process Primary Keys",
484
+ message=(
485
+ f"Unable to process primary key constraints for {schema}.{table_name}. "
486
+ "Ensure SELECT access on DBA_CONSTRAINTS and DBA_CONS_COLUMNS.",
487
+ ),
488
+ context=f"{schema}.{table_name}",
489
+ exc=e,
443
490
  )
444
- if cons_type == "P":
445
- if constraint_name is None:
446
- constraint_name = self._inspector_instance.dialect.normalize_name(
447
- cons_name
448
- )
449
- pkeys.append(local_column)
491
+ # Return empty constraint if we can't process it
492
+ return {"constrained_columns": [], "name": None}
450
493
 
451
494
  return {"constrained_columns": pkeys, "name": constraint_name}
452
495
 
@@ -504,6 +547,16 @@ class OracleInspectorObjectWrapper:
504
547
  f"dba_cons_columns{dblink} - does the user have "
505
548
  "proper rights to the table?"
506
549
  )
550
+ self.report.warning(
551
+ title="Missing Table Permissions",
552
+ message=(
553
+ f"Unable to query table_name from dba_cons_columns{dblink}. "
554
+ "This usually indicates insufficient permissions on the target table. "
555
+ f"Foreign key relationships will not be detected for {schema}.{table_name}. "
556
+ "Please ensure the user has SELECT privileges on dba_cons_columns."
557
+ ),
558
+ context=f"{schema}.{table_name}",
559
+ )
507
560
 
508
561
  rec = fkeys[cons_name]
509
562
  rec["name"] = cons_name
@@ -550,8 +603,8 @@ class OracleInspectorObjectWrapper:
550
603
  text = "SELECT text FROM dba_views WHERE view_name=:view_name"
551
604
 
552
605
  if schema is not None:
553
- text += " AND owner = :schema"
554
- params["schema"] = schema
606
+ params["owner"] = schema
607
+ text += "\nAND owner = :owner"
555
608
 
556
609
  rp = self._inspector_instance.bind.execute(sql.text(text), params).scalar()
557
610
 
@@ -586,6 +639,17 @@ class OracleSource(SQLAlchemySource):
586
639
  def __init__(self, config, ctx):
587
640
  super().__init__(config, ctx, "oracle")
588
641
 
642
+ # if connecting to oracle with enable_thick_mode, it must be initialized before calling
643
+ # create_engine, which is called in get_inspectors()
644
+ # https://python-oracledb.readthedocs.io/en/latest/user_guide/initialization.html#enabling-python-oracledb-thick-mode
645
+ if self.config.enable_thick_mode:
646
+ if platform.system() == "Darwin" or platform.system() == "Windows":
647
+ # windows and mac os require lib_dir to be set explicitly
648
+ oracledb.init_oracle_client(lib_dir=self.config.thick_mode_lib_dir)
649
+ else:
650
+ # linux requires configurating the library path with ldconfig or LD_LIBRARY_PATH
651
+ oracledb.init_oracle_client()
652
+
589
653
  @classmethod
590
654
  def create(cls, config_dict, ctx):
591
655
  config = OracleConfig.parse_obj(config_dict)
@@ -204,7 +204,7 @@ def get_column_type(
204
204
  """
205
205
 
206
206
  TypeClass: Optional[Type] = None
207
- for sql_type in _field_type_mapping.keys():
207
+ for sql_type in _field_type_mapping:
208
208
  if isinstance(column_type, sql_type):
209
209
  TypeClass = _field_type_mapping[sql_type]
210
210
  break
@@ -635,7 +635,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
635
635
 
636
636
  return None
637
637
 
638
- def loop_tables( # noqa: C901
638
+ def loop_tables(
639
639
  self,
640
640
  inspector: Inspector,
641
641
  schema: str,
@@ -973,7 +973,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
973
973
  inspector=inspector,
974
974
  )
975
975
  ),
976
- description=column.get("comment", None),
976
+ description=column.get("comment"),
977
977
  nullable=column["nullable"],
978
978
  recursive=False,
979
979
  globalTags=gtc,
@@ -1031,16 +1031,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1031
1031
  def _get_view_definition(self, inspector: Inspector, schema: str, view: str) -> str:
1032
1032
  try:
1033
1033
  view_definition = inspector.get_view_definition(view, schema)
1034
- if view_definition is None:
1035
- view_definition = ""
1036
- else:
1037
- # Some dialects return a TextClause instead of a raw string,
1038
- # so we need to convert them to a string.
1039
- view_definition = str(view_definition)
1034
+ # Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
1035
+ return str(view_definition) if view_definition else ""
1040
1036
  except NotImplementedError:
1041
- view_definition = ""
1042
-
1043
- return view_definition
1037
+ return ""
1044
1038
 
1045
1039
  def _process_view(
1046
1040
  self,
@@ -317,10 +317,10 @@ def resolve_snowflake_modified_type(type_string: str) -> Any:
317
317
  match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
318
318
  if match:
319
319
  modified_type_base = match.group(1) # Extract the base type
320
- return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
320
+ return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
321
321
 
322
322
  # Fallback for types without precision/scale
323
- return SNOWFLAKE_TYPES_MAP.get(type_string, None)
323
+ return SNOWFLAKE_TYPES_MAP.get(type_string)
324
324
 
325
325
 
326
326
  # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
@@ -180,10 +180,11 @@ def optimized_get_columns(
180
180
  connection: Connection,
181
181
  table_name: str,
182
182
  schema: Optional[str] = None,
183
- tables_cache: MutableMapping[str, List[TeradataTable]] = {},
183
+ tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
184
184
  use_qvci: bool = False,
185
185
  **kw: Dict[str, Any],
186
186
  ) -> List[Dict]:
187
+ tables_cache = tables_cache or {}
187
188
  if schema is None:
188
189
  schema = self.default_schema_name
189
190
 
@@ -314,9 +315,10 @@ def optimized_get_view_definition(
314
315
  connection: Connection,
315
316
  view_name: str,
316
317
  schema: Optional[str] = None,
317
- tables_cache: MutableMapping[str, List[TeradataTable]] = {},
318
+ tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
318
319
  **kw: Dict[str, Any],
319
320
  ) -> Optional[str]:
321
+ tables_cache = tables_cache or {}
320
322
  if schema is None:
321
323
  schema = self.default_schema_name
322
324
 
@@ -649,7 +651,7 @@ ORDER by DataBaseName, TableName;
649
651
  )
650
652
 
651
653
  # Disabling the below because the cached view definition is not the view definition the column in tablesv actually holds the last statement executed against the object... not necessarily the view definition
652
- # setattr( # noqa: B010
654
+ # setattr(
653
655
  # TeradataDialect,
654
656
  # "get_view_definition",
655
657
  # lambda self, connection, view_name, schema=None, **kw: optimized_get_view_definition(
@@ -746,7 +748,7 @@ ORDER by DataBaseName, TableName;
746
748
  else:
747
749
  raise Exception("Unable to get database name from Sqlalchemy inspector")
748
750
 
749
- def cached_loop_tables( # noqa: C901
751
+ def cached_loop_tables(
750
752
  self,
751
753
  inspector: Inspector,
752
754
  schema: str,
@@ -782,7 +784,7 @@ ORDER by DataBaseName, TableName;
782
784
  break
783
785
  return description, properties, location
784
786
 
785
- def cached_loop_views( # noqa: C901
787
+ def cached_loop_views(
786
788
  self,
787
789
  inspector: Inspector,
788
790
  schema: str,
@@ -142,7 +142,7 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k
142
142
  if col_value is not None:
143
143
  properties[col_name] = col_value
144
144
 
145
- return {"text": properties.get("comment", None), "properties": properties}
145
+ return {"text": properties.get("comment"), "properties": properties}
146
146
  else:
147
147
  return self.get_table_comment_default(connection, table_name, schema)
148
148
  except Exception:
@@ -483,7 +483,7 @@ def _parse_struct_fields(parts):
483
483
 
484
484
 
485
485
  def _parse_basic_datatype(s):
486
- for sql_type in _all_atomic_types.keys():
486
+ for sql_type in _all_atomic_types:
487
487
  if isinstance(s, sql_type):
488
488
  return {
489
489
  "type": _all_atomic_types[sql_type],
@@ -114,14 +114,10 @@ class StaleEntityRemovalHandler(
114
114
  self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
115
115
  config.stateful_ingestion
116
116
  )
117
- self.checkpointing_enabled: bool = (
118
- True
119
- if (
120
- self.state_provider.is_stateful_ingestion_configured()
121
- and self.stateful_ingestion_config
122
- and self.stateful_ingestion_config.remove_stale_metadata
123
- )
124
- else False
117
+ self.checkpointing_enabled: bool = bool(
118
+ self.state_provider.is_stateful_ingestion_configured()
119
+ and self.stateful_ingestion_config
120
+ and self.stateful_ingestion_config.remove_stale_metadata
125
121
  )
126
122
  self._job_id = self._init_job_id()
127
123
  self._urns_to_skip: Set[str] = set()