acryl-datahub 1.1.0rc3__py3-none-any.whl → 1.1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (87) hide show
  1. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/METADATA +2532 -2530
  2. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/RECORD +87 -70
  3. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +9 -8
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/restricted_text.py +247 -0
  10. datahub/api/entities/external/unity_catalog_external_entites.py +170 -0
  11. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  12. datahub/cli/delete_cli.py +4 -4
  13. datahub/cli/ingest_cli.py +9 -1
  14. datahub/emitter/mce_builder.py +3 -1
  15. datahub/emitter/response_helper.py +86 -1
  16. datahub/emitter/rest_emitter.py +1 -1
  17. datahub/ingestion/graph/client.py +3 -3
  18. datahub/ingestion/source/apply/datahub_apply.py +4 -4
  19. datahub/ingestion/source/data_lake_common/data_lake_utils.py +22 -10
  20. datahub/ingestion/source/data_lake_common/object_store.py +644 -0
  21. datahub/ingestion/source/datahub/config.py +11 -0
  22. datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
  23. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  24. datahub/ingestion/source/dbt/dbt_common.py +30 -11
  25. datahub/ingestion/source/gcs/gcs_source.py +22 -7
  26. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  27. datahub/ingestion/source/hex/query_fetcher.py +9 -3
  28. datahub/ingestion/source/openapi.py +12 -0
  29. datahub/ingestion/source/openapi_parser.py +56 -37
  30. datahub/ingestion/source/s3/source.py +65 -6
  31. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  32. datahub/ingestion/source/snowflake/snowflake_queries.py +44 -21
  33. datahub/ingestion/source/snowflake/snowflake_query.py +0 -7
  34. datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
  35. datahub/ingestion/source/sql/athena.py +1 -0
  36. datahub/ingestion/source/sql/hive.py +2 -3
  37. datahub/ingestion/source/sql/sql_common.py +98 -34
  38. datahub/ingestion/source/sql/sql_types.py +5 -2
  39. datahub/ingestion/source/unity/config.py +5 -0
  40. datahub/ingestion/source/unity/proxy.py +117 -0
  41. datahub/ingestion/source/unity/source.py +167 -15
  42. datahub/ingestion/source/unity/tag_entities.py +295 -0
  43. datahub/metadata/_internal_schema_classes.py +667 -522
  44. datahub/metadata/_urns/urn_defs.py +1804 -1748
  45. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  46. datahub/metadata/schema.avsc +17358 -17584
  47. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  48. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  49. datahub/metadata/schemas/Applications.avsc +38 -0
  50. datahub/metadata/schemas/ChartKey.avsc +1 -0
  51. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  52. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  53. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  54. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  55. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  56. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  57. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  58. datahub/metadata/schemas/DatasetKey.avsc +1 -0
  59. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  60. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  61. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  62. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  63. datahub/metadata/schemas/MLModelGroupKey.avsc +1 -0
  64. datahub/metadata/schemas/MLModelKey.avsc +1 -0
  65. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  66. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  67. datahub/metadata/schemas/__init__.py +3 -3
  68. datahub/sdk/__init__.py +6 -0
  69. datahub/sdk/_all_entities.py +11 -0
  70. datahub/sdk/_shared.py +118 -1
  71. datahub/sdk/chart.py +315 -0
  72. datahub/sdk/container.py +7 -0
  73. datahub/sdk/dashboard.py +432 -0
  74. datahub/sdk/dataflow.py +309 -0
  75. datahub/sdk/datajob.py +342 -0
  76. datahub/sdk/dataset.py +8 -2
  77. datahub/sdk/entity_client.py +90 -2
  78. datahub/sdk/lineage_client.py +681 -82
  79. datahub/sdk/main_client.py +27 -8
  80. datahub/sdk/mlmodel.py +101 -38
  81. datahub/sdk/mlmodelgroup.py +7 -0
  82. datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
  83. datahub/testing/mce_helpers.py +421 -0
  84. datahub/testing/sdk_v2_helpers.py +18 -0
  85. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/entry_points.txt +0 -0
  86. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/licenses/LICENSE +0 -0
  87. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1.dist-info}/top_level.txt +0 -0
@@ -76,33 +76,36 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
76
76
  from datahub.ingestion.source.state.stateful_ingestion_base import (
77
77
  StatefulIngestionSourceBase,
78
78
  )
79
- from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
80
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
81
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
82
- from datahub.metadata.com.linkedin.pegasus2avro.schema import (
79
+ from datahub.metadata.schema_classes import (
83
80
  ArrayTypeClass,
84
81
  BooleanTypeClass,
85
82
  BytesTypeClass,
83
+ DataPlatformInstanceClass,
84
+ DatasetLineageTypeClass,
85
+ DatasetPropertiesClass,
86
+ DatasetSnapshotClass,
86
87
  DateTypeClass,
87
88
  EnumTypeClass,
88
- ForeignKeyConstraint,
89
- MySqlDDL,
89
+ FineGrainedLineageClass,
90
+ FineGrainedLineageDownstreamTypeClass,
91
+ FineGrainedLineageUpstreamTypeClass,
92
+ ForeignKeyConstraintClass,
93
+ GlobalTagsClass,
94
+ MetadataChangeEventClass,
95
+ MySqlDDLClass,
90
96
  NullTypeClass,
91
97
  NumberTypeClass,
92
98
  RecordTypeClass,
93
- SchemaField,
94
- SchemaFieldDataType,
95
- SchemaMetadata,
99
+ SchemaFieldClass,
100
+ SchemaFieldDataTypeClass,
101
+ SchemaMetadataClass,
102
+ StatusClass,
96
103
  StringTypeClass,
97
- TimeTypeClass,
98
- )
99
- from datahub.metadata.schema_classes import (
100
- DataPlatformInstanceClass,
101
- DatasetLineageTypeClass,
102
- DatasetPropertiesClass,
103
- GlobalTagsClass,
104
104
  SubTypesClass,
105
105
  TagAssociationClass,
106
+ TimeTypeClass,
107
+ UpstreamClass,
108
+ UpstreamLineageClass,
106
109
  ViewPropertiesClass,
107
110
  )
108
111
  from datahub.sql_parsing.schema_resolver import SchemaResolver
@@ -112,6 +115,7 @@ from datahub.utilities.registries.domain_registry import DomainRegistry
112
115
  from datahub.utilities.sqlalchemy_type_converter import (
113
116
  get_native_data_type_for_sqlalchemy_type,
114
117
  )
118
+ from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path
115
119
 
116
120
  if TYPE_CHECKING:
117
121
  from datahub.ingestion.source.ge_data_profiler import (
@@ -198,7 +202,7 @@ def make_sqlalchemy_type(name: str) -> Type[TypeEngine]:
198
202
 
199
203
  def get_column_type(
200
204
  sql_report: SQLSourceReport, dataset_name: str, column_type: Any
201
- ) -> SchemaFieldDataType:
205
+ ) -> SchemaFieldDataTypeClass:
202
206
  """
203
207
  Maps SQLAlchemy types (https://docs.sqlalchemy.org/en/13/core/type_basics.html) to corresponding schema types
204
208
  """
@@ -223,7 +227,7 @@ def get_column_type(
223
227
  )
224
228
  TypeClass = NullTypeClass
225
229
 
226
- return SchemaFieldDataType(type=TypeClass())
230
+ return SchemaFieldDataTypeClass(type=TypeClass())
227
231
 
228
232
 
229
233
  def get_schema_metadata(
@@ -232,10 +236,10 @@ def get_schema_metadata(
232
236
  platform: str,
233
237
  columns: List[dict],
234
238
  pk_constraints: Optional[dict] = None,
235
- foreign_keys: Optional[List[ForeignKeyConstraint]] = None,
236
- canonical_schema: Optional[List[SchemaField]] = None,
239
+ foreign_keys: Optional[List[ForeignKeyConstraintClass]] = None,
240
+ canonical_schema: Optional[List[SchemaFieldClass]] = None,
237
241
  simplify_nested_field_paths: bool = False,
238
- ) -> SchemaMetadata:
242
+ ) -> SchemaMetadataClass:
239
243
  if (
240
244
  simplify_nested_field_paths
241
245
  and canonical_schema is not None
@@ -243,12 +247,12 @@ def get_schema_metadata(
243
247
  ):
244
248
  canonical_schema = downgrade_schema_from_v2(canonical_schema)
245
249
 
246
- schema_metadata = SchemaMetadata(
250
+ schema_metadata = SchemaMetadataClass(
247
251
  schemaName=dataset_name,
248
252
  platform=make_data_platform_urn(platform),
249
253
  version=0,
250
254
  hash="",
251
- platformSchema=MySqlDDL(tableSchema=""),
255
+ platformSchema=MySqlDDLClass(tableSchema=""),
252
256
  fields=canonical_schema or [],
253
257
  )
254
258
  if foreign_keys is not None and foreign_keys != []:
@@ -590,7 +594,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
590
594
  schema: str,
591
595
  fk_dict: Dict[str, str],
592
596
  inspector: Inspector,
593
- ) -> ForeignKeyConstraint:
597
+ ) -> ForeignKeyConstraintClass:
594
598
  referred_schema: Optional[str] = fk_dict.get("referred_schema")
595
599
 
596
600
  if not referred_schema:
@@ -617,7 +621,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
617
621
  for f in fk_dict["referred_columns"]
618
622
  ]
619
623
 
620
- return ForeignKeyConstraint(
624
+ return ForeignKeyConstraintClass(
621
625
  fk_dict["name"], foreign_fields, source_fields, foreign_dataset
622
626
  )
623
627
 
@@ -714,7 +718,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
714
718
  self.config.platform_instance,
715
719
  self.config.env,
716
720
  )
717
- dataset_snapshot = DatasetSnapshot(
721
+ dataset_snapshot = DatasetSnapshotClass(
718
722
  urn=dataset_urn,
719
723
  aspects=[StatusClass(removed=False)],
720
724
  )
@@ -742,6 +746,30 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
742
746
  tags=extra_tags,
743
747
  partition_keys=partitions,
744
748
  )
749
+
750
+ if self.config.include_table_location_lineage and location_urn:
751
+ self.aggregator.add_known_lineage_mapping(
752
+ upstream_urn=location_urn,
753
+ downstream_urn=dataset_snapshot.urn,
754
+ lineage_type=DatasetLineageTypeClass.COPY,
755
+ )
756
+ external_upstream_table = UpstreamClass(
757
+ dataset=location_urn,
758
+ type=DatasetLineageTypeClass.COPY,
759
+ )
760
+
761
+ yield MetadataChangeProposalWrapper(
762
+ entityUrn=dataset_snapshot.urn,
763
+ aspect=UpstreamLineageClass(
764
+ upstreams=[external_upstream_table],
765
+ fineGrainedLineages=self.get_fine_grained_lineages(
766
+ dataset_urn=dataset_snapshot.urn,
767
+ upstream_dataset_urn=location_urn,
768
+ schema_fields=schema_fields,
769
+ ),
770
+ ),
771
+ ).as_workunit()
772
+
745
773
  schema_metadata = get_schema_metadata(
746
774
  self.report,
747
775
  dataset_name,
@@ -762,7 +790,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
762
790
  yield from self.add_table_to_schema_container(
763
791
  dataset_urn=dataset_urn, db_name=db_name, schema=schema
764
792
  )
765
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
793
+ mce = MetadataChangeEventClass(proposedSnapshot=dataset_snapshot)
766
794
  yield SqlWorkUnit(id=dataset_name, mce=mce)
767
795
  dpi_aspect = self.get_dataplatform_instance_aspect(dataset_urn=dataset_urn)
768
796
  if dpi_aspect:
@@ -797,7 +825,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
797
825
  schema: str,
798
826
  table: str,
799
827
  data_reader: Optional[DataReader],
800
- schema_metadata: SchemaMetadata,
828
+ schema_metadata: SchemaMetadataClass,
801
829
  ) -> None:
802
830
  try:
803
831
  if (
@@ -908,7 +936,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
908
936
 
909
937
  def _get_foreign_keys(
910
938
  self, dataset_urn: str, inspector: Inspector, schema: str, table: str
911
- ) -> List[ForeignKeyConstraint]:
939
+ ) -> List[ForeignKeyConstraintClass]:
912
940
  try:
913
941
  foreign_keys = [
914
942
  self.get_foreign_key_metadata(dataset_urn, schema, fk_rec, inspector)
@@ -922,6 +950,42 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
922
950
  foreign_keys = []
923
951
  return foreign_keys
924
952
 
953
+ def get_fine_grained_lineages(
954
+ self,
955
+ dataset_urn: str,
956
+ upstream_dataset_urn: str,
957
+ schema_fields: List[SchemaFieldClass],
958
+ ) -> Optional[List[FineGrainedLineageClass]]:
959
+ fine_grained_lineages: List[FineGrainedLineageClass] = []
960
+
961
+ for schema_field in schema_fields:
962
+ try:
963
+ field_path_v1 = get_simple_field_path_from_v2_field_path(
964
+ schema_field.fieldPath
965
+ )
966
+ fine_grained_lineages.append(
967
+ FineGrainedLineageClass(
968
+ downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
969
+ downstreams=[make_schema_field_urn(dataset_urn, field_path_v1)],
970
+ upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
971
+ upstreams=[
972
+ make_schema_field_urn(
973
+ upstream_dataset_urn,
974
+ get_simple_field_path_from_v2_field_path(
975
+ schema_field.fieldPath
976
+ ),
977
+ )
978
+ ],
979
+ )
980
+ )
981
+ except Exception as e:
982
+ logger.warning(
983
+ f"Error processing field path for {dataset_urn}: {str(e)}"
984
+ )
985
+ continue
986
+
987
+ return fine_grained_lineages if fine_grained_lineages else None
988
+
925
989
  def get_schema_fields(
926
990
  self,
927
991
  dataset_name: str,
@@ -930,7 +994,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
930
994
  pk_constraints: Optional[dict] = None,
931
995
  partition_keys: Optional[List[str]] = None,
932
996
  tags: Optional[Dict[str, List[str]]] = None,
933
- ) -> List[SchemaField]:
997
+ ) -> List[SchemaFieldClass]:
934
998
  canonical_schema = []
935
999
  for column in columns:
936
1000
  column_tags: Optional[List[str]] = None
@@ -955,14 +1019,14 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
955
1019
  pk_constraints: Optional[dict] = None,
956
1020
  partition_keys: Optional[List[str]] = None,
957
1021
  tags: Optional[List[str]] = None,
958
- ) -> List[SchemaField]:
1022
+ ) -> List[SchemaFieldClass]:
959
1023
  gtc: Optional[GlobalTagsClass] = None
960
1024
  if tags:
961
1025
  tags_str = [make_tag_urn(t) for t in tags]
962
1026
  tags_tac = [TagAssociationClass(t) for t in tags_str]
963
1027
  gtc = GlobalTagsClass(tags_tac)
964
1028
  full_type = column.get("full_type")
965
- field = SchemaField(
1029
+ field = SchemaFieldClass(
966
1030
  fieldPath=column["name"],
967
1031
  type=get_column_type(self.report, dataset_name, column["type"]),
968
1032
  nativeDataType=(
@@ -1092,7 +1156,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1092
1156
  default_schema=default_schema,
1093
1157
  )
1094
1158
 
1095
- dataset_snapshot = DatasetSnapshot(
1159
+ dataset_snapshot = DatasetSnapshotClass(
1096
1160
  urn=dataset_urn,
1097
1161
  aspects=[StatusClass(removed=False)],
1098
1162
  )
@@ -1111,7 +1175,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1111
1175
  dataset_snapshot.aspects.append(dataset_properties)
1112
1176
  if schema_metadata:
1113
1177
  dataset_snapshot.aspects.append(schema_metadata)
1114
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
1178
+ mce = MetadataChangeEventClass(proposedSnapshot=dataset_snapshot)
1115
1179
  yield SqlWorkUnit(id=dataset_name, mce=mce)
1116
1180
  dpi_aspect = self.get_dataplatform_instance_aspect(dataset_urn=dataset_urn)
1117
1181
  if dpi_aspect:
@@ -284,6 +284,8 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
284
284
  "INTEGER": NumberType,
285
285
  "BIGINT": NumberType,
286
286
  "SMALLINT": NumberType,
287
+ "TINYINT": NumberType,
288
+ "BYTEINT": NumberType,
287
289
  "FLOAT": NumberType,
288
290
  "FLOAT4": NumberType,
289
291
  "FLOAT8": NumberType,
@@ -291,6 +293,7 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
291
293
  "DOUBLE PRECISION": NumberType,
292
294
  "REAL": NumberType,
293
295
  "VARCHAR": StringType,
296
+ "CHARACTER VARYING": StringType,
294
297
  "CHAR": StringType,
295
298
  "CHARACTER": StringType,
296
299
  "STRING": StringType,
@@ -313,8 +316,8 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
313
316
 
314
317
 
315
318
  def resolve_snowflake_modified_type(type_string: str) -> Any:
316
- # Match types with precision and scale, e.g., 'DECIMAL(38,0)'
317
- match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
319
+ # Match types with precision and scale, e.g., 'DECIMAL(38,0)' or TIME(3)
320
+ match = re.match(r"([a-z A-Z_]+)\(\d+(,(\s+)?\d+)?\)", type_string)
318
321
  if match:
319
322
  modified_type_base = match.group(1) # Extract the base type
320
323
  return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
@@ -229,6 +229,11 @@ class UnityCatalogSourceConfig(
229
229
  description="Option to enable/disable ownership generation for metastores, catalogs, schemas, and tables.",
230
230
  )
231
231
 
232
+ include_tags: bool = pydantic.Field(
233
+ default=True,
234
+ description="Option to enable/disable column/table tag extraction.",
235
+ )
236
+
232
237
  _rename_table_ownership = pydantic_renamed_field(
233
238
  "include_table_ownership", "include_ownership"
234
239
  )
@@ -8,6 +8,8 @@ from datetime import datetime
8
8
  from typing import Any, Dict, Iterable, List, Optional, Union, cast
9
9
  from unittest.mock import patch
10
10
 
11
+ import cachetools
12
+ from cachetools import cached
11
13
  from databricks.sdk import WorkspaceClient
12
14
  from databricks.sdk.service.catalog import (
13
15
  CatalogInfo,
@@ -25,8 +27,10 @@ from databricks.sdk.service.sql import (
25
27
  QueryStatus,
26
28
  )
27
29
  from databricks.sdk.service.workspace import ObjectType
30
+ from databricks.sql import connect
28
31
 
29
32
  from datahub._version import nice_version_name
33
+ from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
30
34
  from datahub.emitter.mce_builder import parse_ts_millis
31
35
  from datahub.ingestion.source.unity.hive_metastore_proxy import HiveMetastoreProxy
32
36
  from datahub.ingestion.source.unity.proxy_profiling import (
@@ -108,6 +112,13 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
108
112
  self.warehouse_id = warehouse_id or ""
109
113
  self.report = report
110
114
  self.hive_metastore_proxy = hive_metastore_proxy
115
+ self._sql_connection_params = {
116
+ "server_hostname": self._workspace_client.config.host.replace(
117
+ "https://", ""
118
+ ),
119
+ "http_path": f"/sql/1.0/warehouses/{self.warehouse_id}",
120
+ "access_token": self._workspace_client.config.token,
121
+ }
111
122
 
112
123
  def check_basic_connectivity(self) -> bool:
113
124
  return bool(self._workspace_client.catalogs.list(include_browse=True))
@@ -492,3 +503,109 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
492
503
  executed_as_user_id=info.executed_as_user_id,
493
504
  executed_as_user_name=info.executed_as_user_name,
494
505
  )
506
+
507
+ def _execute_sql_query(self, query: str) -> List[List[str]]:
508
+ """Execute SQL query using databricks-sql connector for better performance"""
509
+ try:
510
+ with connect(
511
+ **self._sql_connection_params
512
+ ) as connection, connection.cursor() as cursor:
513
+ cursor.execute(query)
514
+ return cursor.fetchall()
515
+
516
+ except Exception as e:
517
+ logger.warning(f"Failed to execute SQL query: {e}")
518
+ return []
519
+
520
+ @cached(cachetools.FIFOCache(maxsize=100))
521
+ def get_schema_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
522
+ """Optimized version using databricks-sql"""
523
+ logger.info(f"Fetching schema tags for catalog: {catalog}")
524
+
525
+ query = f"SELECT * FROM {catalog}.information_schema.schema_tags"
526
+ rows = self._execute_sql_query(query)
527
+
528
+ result_dict: Dict[str, List[UnityCatalogTag]] = {}
529
+
530
+ for row in rows:
531
+ catalog_name, schema_name, tag_name, tag_value = row
532
+ schema_key = f"{catalog_name}.{schema_name}"
533
+
534
+ if schema_key not in result_dict:
535
+ result_dict[schema_key] = []
536
+
537
+ result_dict[schema_key].append(
538
+ UnityCatalogTag(key=tag_name, value=tag_value)
539
+ )
540
+
541
+ return result_dict
542
+
543
+ @cached(cachetools.FIFOCache(maxsize=100))
544
+ def get_catalog_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
545
+ """Optimized version using databricks-sql"""
546
+ logger.info(f"Fetching table tags for catalog: {catalog}")
547
+
548
+ query = f"SELECT * FROM {catalog}.information_schema.catalog_tags"
549
+ rows = self._execute_sql_query(query)
550
+
551
+ result_dict: Dict[str, List[UnityCatalogTag]] = {}
552
+
553
+ for row in rows:
554
+ catalog_name, tag_name, tag_value = row
555
+
556
+ if catalog_name not in result_dict:
557
+ result_dict[catalog_name] = []
558
+
559
+ result_dict[catalog_name].append(
560
+ UnityCatalogTag(key=tag_name, value=tag_value)
561
+ )
562
+
563
+ return result_dict
564
+
565
+ @cached(cachetools.FIFOCache(maxsize=100))
566
+ def get_table_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
567
+ """Optimized version using databricks-sql"""
568
+ logger.info(f"Fetching table tags for catalog: {catalog}")
569
+
570
+ query = f"SELECT * FROM {catalog}.information_schema.table_tags"
571
+ rows = self._execute_sql_query(query)
572
+
573
+ result_dict: Dict[str, List[UnityCatalogTag]] = {}
574
+
575
+ for row in rows:
576
+ catalog_name, schema_name, table_name, tag_name, tag_value = row
577
+ table_key = f"{catalog_name}.{schema_name}.{table_name}"
578
+
579
+ if table_key not in result_dict:
580
+ result_dict[table_key] = []
581
+
582
+ result_dict[table_key].append(
583
+ UnityCatalogTag(key=tag_name, value=tag_value if tag_value else None)
584
+ )
585
+
586
+ return result_dict
587
+
588
+ @cached(cachetools.FIFOCache(maxsize=100))
589
+ def get_column_tags(self, catalog: str) -> Dict[str, List[UnityCatalogTag]]:
590
+ """Optimized version using databricks-sql"""
591
+ logger.info(f"Fetching column tags for catalog: {catalog}")
592
+
593
+ query = f"SELECT * FROM {catalog}.information_schema.column_tags"
594
+ rows = self._execute_sql_query(query)
595
+
596
+ result_dict: Dict[str, List[UnityCatalogTag]] = {}
597
+
598
+ for row in rows:
599
+ catalog_name, schema_name, table_name, column_name, tag_name, tag_value = (
600
+ row
601
+ )
602
+ column_key = f"{catalog_name}.{schema_name}.{table_name}.{column_name}"
603
+
604
+ if column_key not in result_dict:
605
+ result_dict[column_key] = []
606
+
607
+ result_dict[column_key].append(
608
+ UnityCatalogTag(key=tag_name, value=tag_value if tag_value else None)
609
+ )
610
+
611
+ return result_dict