acryl-datahub 1.0.0rc6__py3-none-any.whl → 1.0.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (74) hide show
  1. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/METADATA +2490 -2490
  2. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/RECORD +74 -74
  3. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/cli/docker_cli.py +1 -1
  6. datahub/cli/iceberg_cli.py +1 -1
  7. datahub/cli/lite_cli.py +4 -2
  8. datahub/cli/specific/dataproduct_cli.py +1 -1
  9. datahub/configuration/git.py +1 -3
  10. datahub/configuration/kafka.py +1 -1
  11. datahub/ingestion/fs/s3_fs.py +2 -2
  12. datahub/ingestion/glossary/classification_mixin.py +1 -1
  13. datahub/ingestion/graph/client.py +16 -7
  14. datahub/ingestion/graph/entity_versioning.py +3 -3
  15. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
  16. datahub/ingestion/source/abs/config.py +2 -4
  17. datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
  18. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +1 -1
  19. datahub/ingestion/source/cassandra/cassandra_api.py +2 -1
  20. datahub/ingestion/source/csv_enricher.py +3 -3
  21. datahub/ingestion/source/dbt/dbt_common.py +1 -1
  22. datahub/ingestion/source/dremio/dremio_api.py +3 -3
  23. datahub/ingestion/source/dremio/dremio_aspects.py +2 -1
  24. datahub/ingestion/source/file.py +5 -2
  25. datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
  26. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  27. datahub/ingestion/source/ge_data_profiler.py +11 -14
  28. datahub/ingestion/source/iceberg/iceberg.py +46 -12
  29. datahub/ingestion/source/iceberg/iceberg_common.py +31 -20
  30. datahub/ingestion/source/identity/okta.py +1 -3
  31. datahub/ingestion/source/kafka/kafka.py +1 -1
  32. datahub/ingestion/source/kafka_connect/source_connectors.py +4 -7
  33. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  34. datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
  35. datahub/ingestion/source/looker/looker_template_language.py +4 -2
  36. datahub/ingestion/source/looker/lookml_source.py +3 -2
  37. datahub/ingestion/source/metabase.py +54 -32
  38. datahub/ingestion/source/metadata/lineage.py +2 -2
  39. datahub/ingestion/source/mode.py +1 -1
  40. datahub/ingestion/source/neo4j/neo4j_source.py +1 -1
  41. datahub/ingestion/source/nifi.py +6 -3
  42. datahub/ingestion/source/openapi_parser.py +2 -2
  43. datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
  44. datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
  45. datahub/ingestion/source/powerbi/powerbi.py +1 -3
  46. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
  47. datahub/ingestion/source/powerbi_report_server/report_server.py +1 -1
  48. datahub/ingestion/source/pulsar.py +2 -2
  49. datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
  50. datahub/ingestion/source/redash.py +2 -1
  51. datahub/ingestion/source/s3/config.py +2 -4
  52. datahub/ingestion/source/s3/source.py +20 -41
  53. datahub/ingestion/source/salesforce.py +1 -1
  54. datahub/ingestion/source/schema_inference/object.py +1 -1
  55. datahub/ingestion/source/sigma/sigma.py +1 -1
  56. datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
  57. datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
  58. datahub/ingestion/source/sql/athena.py +2 -2
  59. datahub/ingestion/source/sql/druid.py +1 -5
  60. datahub/ingestion/source/sql/sql_common.py +2 -2
  61. datahub/ingestion/source/sql/sql_types.py +2 -2
  62. datahub/ingestion/source/sql/teradata.py +4 -2
  63. datahub/ingestion/source/sql/trino.py +2 -2
  64. datahub/ingestion/source/superset.py +65 -37
  65. datahub/ingestion/source/tableau/tableau.py +3 -6
  66. datahub/ingestion/source/tableau/tableau_common.py +2 -1
  67. datahub/lite/duckdb_lite.py +5 -10
  68. datahub/lite/lite_local.py +1 -1
  69. datahub/lite/lite_util.py +4 -3
  70. datahub/sdk/dataset.py +3 -3
  71. datahub/utilities/memory_footprint.py +3 -2
  72. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/LICENSE +0 -0
  73. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/entry_points.txt +0 -0
  74. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc8.dist-info}/top_level.txt +0 -0
@@ -69,9 +69,19 @@ class MetabaseConfig(DatasetLineageProviderConfigBase, StatefulIngestionConfigBa
69
69
  default=None,
70
70
  description="optional URL to use in links (if `connect_uri` is only for ingestion)",
71
71
  )
72
- username: Optional[str] = Field(default=None, description="Metabase username.")
72
+ username: Optional[str] = Field(
73
+ default=None,
74
+ description="Metabase username, used when an API key is not provided.",
75
+ )
73
76
  password: Optional[pydantic.SecretStr] = Field(
74
- default=None, description="Metabase password."
77
+ default=None,
78
+ description="Metabase password, used when an API key is not provided.",
79
+ )
80
+
81
+ # https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#example-get-request
82
+ api_key: Optional[pydantic.SecretStr] = Field(
83
+ default=None,
84
+ description="Metabase API key. If provided, the username and password will be ignored. Recommended method.",
75
85
  )
76
86
  # TODO: Check and remove this if no longer needed.
77
87
  # Config database_alias is removed from sql sources.
@@ -178,30 +188,40 @@ class MetabaseSource(StatefulIngestionSourceBase):
178
188
  self.source_config: MetabaseConfig = config
179
189
 
180
190
  def setup_session(self) -> None:
181
- login_response = requests.post(
182
- f"{self.config.connect_uri}/api/session",
183
- None,
184
- {
185
- "username": self.config.username,
186
- "password": (
187
- self.config.password.get_secret_value()
188
- if self.config.password
189
- else None
190
- ),
191
- },
192
- )
191
+ self.session = requests.session()
192
+ if self.config.api_key:
193
+ self.session.headers.update(
194
+ {
195
+ "x-api-key": self.config.api_key.get_secret_value(),
196
+ "Content-Type": "application/json",
197
+ "Accept": "*/*",
198
+ }
199
+ )
200
+ else:
201
+ # If no API key is provided, generate a session token using username and password.
202
+ login_response = requests.post(
203
+ f"{self.config.connect_uri}/api/session",
204
+ None,
205
+ {
206
+ "username": self.config.username,
207
+ "password": (
208
+ self.config.password.get_secret_value()
209
+ if self.config.password
210
+ else None
211
+ ),
212
+ },
213
+ )
193
214
 
194
- login_response.raise_for_status()
195
- self.access_token = login_response.json().get("id", "")
215
+ login_response.raise_for_status()
216
+ self.access_token = login_response.json().get("id", "")
196
217
 
197
- self.session = requests.session()
198
- self.session.headers.update(
199
- {
200
- "X-Metabase-Session": f"{self.access_token}",
201
- "Content-Type": "application/json",
202
- "Accept": "*/*",
203
- }
204
- )
218
+ self.session.headers.update(
219
+ {
220
+ "X-Metabase-Session": f"{self.access_token}",
221
+ "Content-Type": "application/json",
222
+ "Accept": "*/*",
223
+ }
224
+ )
205
225
 
206
226
  # Test the connection
207
227
  try:
@@ -217,15 +237,17 @@ class MetabaseSource(StatefulIngestionSourceBase):
217
237
  )
218
238
 
219
239
  def close(self) -> None:
220
- response = requests.delete(
221
- f"{self.config.connect_uri}/api/session",
222
- headers={"X-Metabase-Session": self.access_token},
223
- )
224
- if response.status_code not in (200, 204):
225
- self.report.report_failure(
226
- title="Unable to Log User Out",
227
- message=f"Unable to logout for user {self.config.username}",
240
+ # API key authentication does not require session closure.
241
+ if not self.config.api_key:
242
+ response = requests.delete(
243
+ f"{self.config.connect_uri}/api/session",
244
+ headers={"X-Metabase-Session": self.access_token},
228
245
  )
246
+ if response.status_code not in (200, 204):
247
+ self.report.report_failure(
248
+ title="Unable to Log User Out",
249
+ message=f"Unable to logout for user {self.config.username}",
250
+ )
229
251
  super().close()
230
252
 
231
253
  def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
@@ -104,8 +104,8 @@ class FineGrainedLineageConfig(ConfigModel):
104
104
 
105
105
  class EntityNodeConfig(ConfigModel):
106
106
  entity: EntityConfig
107
- upstream: Optional[List["EntityNodeConfig"]]
108
- fineGrainedLineages: Optional[List[FineGrainedLineageConfig]]
107
+ upstream: Optional[List["EntityNodeConfig"]] = None
108
+ fineGrainedLineages: Optional[List[FineGrainedLineageConfig]] = None
109
109
 
110
110
 
111
111
  # https://pydantic-docs.helpmanual.io/usage/postponed_annotations/ required for when you reference a model within itself
@@ -1494,7 +1494,7 @@ class ModeSource(StatefulIngestionSourceBase):
1494
1494
  sleep_time = error_response.headers.get("retry-after")
1495
1495
  if sleep_time is not None:
1496
1496
  time.sleep(float(sleep_time))
1497
- raise HTTPError429
1497
+ raise HTTPError429 from None
1498
1498
 
1499
1499
  raise http_error
1500
1500
 
@@ -292,7 +292,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
292
292
  return record["properties"]
293
293
 
294
294
  def get_relationships(self, record: dict) -> dict:
295
- return record.get("relationships", None)
295
+ return record.get("relationships", {})
296
296
 
297
297
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
298
298
  return [
@@ -1234,11 +1234,14 @@ class NifiSource(StatefulIngestionSourceBase):
1234
1234
  job_type: str,
1235
1235
  description: Optional[str],
1236
1236
  job_properties: Optional[Dict[str, str]] = None,
1237
- inlets: List[str] = [],
1238
- outlets: List[str] = [],
1239
- inputJobs: List[str] = [],
1237
+ inlets: Optional[List[str]] = None,
1238
+ outlets: Optional[List[str]] = None,
1239
+ inputJobs: Optional[List[str]] = None,
1240
1240
  status: Optional[str] = None,
1241
1241
  ) -> Iterable[MetadataWorkUnit]:
1242
+ inlets = inlets or []
1243
+ outlets = outlets or []
1244
+ inputJobs = inputJobs or []
1242
1245
  logger.debug(f"Begining construction of job workunit for {job_urn}")
1243
1246
  if job_properties:
1244
1247
  job_properties = {k: v for k, v in job_properties.items() if v is not None}
@@ -167,7 +167,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
167
167
  Try to determine if example data is defined for the endpoint, and return it
168
168
  """
169
169
  data = {}
170
- if "content" in base_res.keys():
170
+ if "content" in base_res:
171
171
  res_cont = base_res["content"]
172
172
  if "application/json" in res_cont.keys():
173
173
  ex_field = None
@@ -188,7 +188,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
188
188
  )
189
189
  elif "text/csv" in res_cont.keys():
190
190
  data = res_cont["text/csv"]["schema"]
191
- elif "examples" in base_res.keys():
191
+ elif "examples" in base_res:
192
192
  data = base_res["examples"]["application/json"]
193
193
 
194
194
  return data
@@ -2,7 +2,7 @@ import functools
2
2
  import importlib.resources as pkg_resource
3
3
  import logging
4
4
  import os
5
- from typing import Dict, List
5
+ from typing import Dict, List, Optional
6
6
 
7
7
  import lark
8
8
  from lark import Lark, Tree
@@ -65,8 +65,9 @@ def get_upstream_tables(
65
65
  platform_instance_resolver: AbstractDataPlatformInstanceResolver,
66
66
  ctx: PipelineContext,
67
67
  config: PowerBiDashboardSourceConfig,
68
- parameters: Dict[str, str] = {},
68
+ parameters: Optional[Dict[str, str]] = None,
69
69
  ) -> List[datahub.ingestion.source.powerbi.m_query.data_classes.Lineage]:
70
+ parameters = parameters or {}
70
71
  if table.expression is None:
71
72
  logger.debug(f"There is no M-Query expression in table {table.full_name}")
72
73
  return []
@@ -70,13 +70,14 @@ def get_first_rule(tree: Tree, rule: str) -> Optional[Tree]:
70
70
  return expression_tree
71
71
 
72
72
 
73
- def token_values(tree: Tree, parameters: Dict[str, str] = {}) -> List[str]:
73
+ def token_values(tree: Tree, parameters: Optional[Dict[str, str]] = None) -> List[str]:
74
74
  """
75
75
  :param tree: Tree to traverse
76
76
  :param parameters: If parameters is not an empty dict, it will try to resolve identifier variable references
77
77
  using the values in 'parameters'.
78
78
  :return: List of leaf token data
79
79
  """
80
+ parameters = parameters or {}
80
81
  values: List[str] = []
81
82
 
82
83
  def internal(node: Union[Tree, Token]) -> None:
@@ -890,9 +890,7 @@ class Mapper:
890
890
  set(user_rights) & set(self.__config.ownership.owner_criteria)
891
891
  )
892
892
  > 0
893
- ):
894
- user_mcps.extend(self.to_datahub_user(user))
895
- elif self.__config.ownership.owner_criteria is None:
893
+ ) or self.__config.ownership.owner_criteria is None:
896
894
  user_mcps.extend(self.to_datahub_user(user))
897
895
  else:
898
896
  continue
@@ -380,8 +380,9 @@ class DataResolverBase(ABC):
380
380
  def itr_pages(
381
381
  self,
382
382
  endpoint: str,
383
- parameter_override: Dict = {},
383
+ parameter_override: Optional[Dict] = None,
384
384
  ) -> Iterator[List[Dict]]:
385
+ parameter_override = parameter_override or {}
385
386
  params: dict = {
386
387
  "$skip": 0,
387
388
  "$top": self.TOP,
@@ -196,7 +196,7 @@ class PowerBiReportServerAPI:
196
196
  }
197
197
 
198
198
  reports: List[Any] = []
199
- for report_type in report_types_mapping.keys():
199
+ for report_type in report_types_mapping:
200
200
  report_get_endpoint: str = API_ENDPOINTS[report_type]
201
201
  # Replace place holders
202
202
  report_get_endpoint_http = report_get_endpoint.format(
@@ -230,8 +230,8 @@ class PulsarSource(StatefulIngestionSourceBase):
230
230
  self.report.report_warning("HTTPError", message)
231
231
  except requests.exceptions.RequestException as e:
232
232
  raise Exception(
233
- f"An ambiguous exception occurred while handling the request: {e}"
234
- )
233
+ "An ambiguous exception occurred while handling the request"
234
+ ) from e
235
235
 
236
236
  @classmethod
237
237
  def create(cls, config_dict, ctx):
@@ -17,8 +17,9 @@ class WebsocketConnection:
17
17
  self.handle = [-1]
18
18
 
19
19
  def _build_websocket_request_dict(
20
- self, method: str, params: Union[Dict, List] = {}
20
+ self, method: str, params: Optional[Union[Dict, List]] = None
21
21
  ) -> Dict:
22
+ params = params or {}
22
23
  return {
23
24
  "jsonrpc": "2.0",
24
25
  "id": self.request_id,
@@ -37,11 +38,12 @@ class WebsocketConnection:
37
38
  return {}
38
39
 
39
40
  def websocket_send_request(
40
- self, method: str, params: Union[Dict, List] = {}
41
+ self, method: str, params: Optional[Union[Dict, List]] = None
41
42
  ) -> Dict:
42
43
  """
43
44
  Method to send request to websocket
44
45
  """
46
+ params = params or {}
45
47
  self.request_id += 1
46
48
  request = self._build_websocket_request_dict(method, params)
47
49
  response = self._send_request(request=request)
@@ -421,8 +421,9 @@ class RedashSource(StatefulIngestionSourceBase):
421
421
  return database_name
422
422
 
423
423
  def _get_datasource_urns(
424
- self, data_source: Dict, sql_query_data: Dict = {}
424
+ self, data_source: Dict, sql_query_data: Optional[Dict] = None
425
425
  ) -> Optional[List[str]]:
426
+ sql_query_data = sql_query_data or {}
426
427
  platform = self._get_platform_based_on_datasource(data_source)
427
428
  database_name = self._get_database_name_based_on_datasource(data_source)
428
429
  data_source_syntax = data_source.get("syntax")
@@ -154,10 +154,8 @@ class DataLakeSourceConfig(
154
154
  return path_specs
155
155
 
156
156
  @pydantic.validator("platform", always=True)
157
- def platform_valid(cls, platform: str, values: dict) -> str:
158
- inferred_platform = values.get(
159
- "platform", None
160
- ) # we may have inferred it above
157
+ def platform_valid(cls, platform: Any, values: dict) -> str:
158
+ inferred_platform = values.get("platform") # we may have inferred it above
161
159
  platform = platform or inferred_platform
162
160
  if not platform:
163
161
  raise ValueError("platform must not be empty")
@@ -834,7 +834,7 @@ class S3Source(StatefulIngestionSourceBase):
834
834
  min=min,
835
835
  )
836
836
  folders.extend(folders_list)
837
- if not path_spec.traversal_method == FolderTraversalMethod.ALL:
837
+ if path_spec.traversal_method != FolderTraversalMethod.ALL:
838
838
  return folders
839
839
  if folders:
840
840
  return folders
@@ -847,7 +847,7 @@ class S3Source(StatefulIngestionSourceBase):
847
847
  path_spec: PathSpec,
848
848
  bucket: "Bucket",
849
849
  prefix: str,
850
- ) -> List[Folder]:
850
+ ) -> Iterable[Folder]:
851
851
  """
852
852
  Retrieves all the folders in a path by listing all the files in the prefix.
853
853
  If the prefix is a full path then only that folder will be extracted.
@@ -877,51 +877,30 @@ class S3Source(StatefulIngestionSourceBase):
877
877
  s3_objects = (
878
878
  obj
879
879
  for obj in bucket.objects.filter(Prefix=prefix).page_size(PAGE_SIZE)
880
- if _is_allowed_path(path_spec, f"s3://{obj.bucket_name}/{obj.key}")
880
+ if _is_allowed_path(
881
+ path_spec, self.create_s3_path(obj.bucket_name, obj.key)
882
+ )
881
883
  )
882
-
883
- partitions: List[Folder] = []
884
884
  grouped_s3_objects_by_dirname = groupby_unsorted(
885
885
  s3_objects,
886
886
  key=lambda obj: obj.key.rsplit("/", 1)[0],
887
887
  )
888
- for key, group in grouped_s3_objects_by_dirname:
889
- file_size = 0
890
- creation_time = None
891
- modification_time = None
892
-
893
- for item in group:
894
- file_size += item.size
895
- if creation_time is None or item.last_modified < creation_time:
896
- creation_time = item.last_modified
897
- if modification_time is None or item.last_modified > modification_time:
898
- modification_time = item.last_modified
899
- max_file = item
900
-
901
- if modification_time is None:
902
- logger.warning(
903
- f"Unable to find any files in the folder {key}. Skipping..."
904
- )
905
- continue
906
-
907
- id = path_spec.get_partition_from_path(
908
- self.create_s3_path(max_file.bucket_name, max_file.key)
888
+ for _, group in grouped_s3_objects_by_dirname:
889
+ max_file = max(group, key=lambda x: x.last_modified)
890
+ max_file_s3_path = self.create_s3_path(max_file.bucket_name, max_file.key)
891
+
892
+ # If partition_id is None, it means the folder is not a partition
893
+ partition_id = path_spec.get_partition_from_path(max_file_s3_path)
894
+
895
+ yield Folder(
896
+ partition_id=partition_id,
897
+ is_partition=bool(partition_id),
898
+ creation_time=min(obj.last_modified for obj in group),
899
+ modification_time=max_file.last_modified,
900
+ sample_file=max_file_s3_path,
901
+ size=sum(obj.size for obj in group),
909
902
  )
910
903
 
911
- # If id is None, it means the folder is not a partition
912
- partitions.append(
913
- Folder(
914
- partition_id=id,
915
- is_partition=bool(id),
916
- creation_time=creation_time if creation_time else None, # type: ignore[arg-type]
917
- modification_time=modification_time,
918
- sample_file=self.create_s3_path(max_file.bucket_name, max_file.key),
919
- size=file_size,
920
- )
921
- )
922
-
923
- return partitions
924
-
925
904
  def s3_browser(self, path_spec: PathSpec, sample_size: int) -> Iterable[BrowsePath]:
926
905
  if self.source_config.aws_config is None:
927
906
  raise ValueError("aws_config not set. Cannot browse s3")
@@ -1000,7 +979,7 @@ class S3Source(StatefulIngestionSourceBase):
1000
979
  min=True,
1001
980
  )
1002
981
  dirs_to_process.append(dirs_to_process_min[0])
1003
- folders = []
982
+ folders: List[Folder] = []
1004
983
  for dir in dirs_to_process:
1005
984
  logger.info(f"Getting files from folder: {dir}")
1006
985
  prefix_to_process = urlparse(dir).path.lstrip("/")
@@ -615,7 +615,7 @@ class SalesforceSource(StatefulIngestionSourceBase):
615
615
  prefix = "\\" if text.startswith("#") else ""
616
616
  desc += f"\n\n{prefix}{text}"
617
617
 
618
- text = field.get("InlineHelpText", None)
618
+ text = field.get("InlineHelpText")
619
619
  if text:
620
620
  prefix = "\\" if text.startswith("#") else ""
621
621
  desc += f"\n\n{prefix}{text}"
@@ -149,7 +149,7 @@ def construct_schema(
149
149
 
150
150
  extended_schema: Dict[Tuple[str, ...], SchemaDescription] = {}
151
151
 
152
- for field_path in schema.keys():
152
+ for field_path in schema:
153
153
  field_types = schema[field_path]["types"]
154
154
  field_type: Union[str, type] = "mixed"
155
155
 
@@ -124,7 +124,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
124
124
  try:
125
125
  self.sigma_api = SigmaAPI(self.config, self.reporter)
126
126
  except Exception as e:
127
- raise ConfigurationError(f"Unable to connect sigma API. Exception: {e}")
127
+ raise ConfigurationError("Unable to connect sigma API") from e
128
128
 
129
129
  @staticmethod
130
130
  def test_connection(config_dict: dict) -> TestConnectionReport:
@@ -125,7 +125,7 @@ class SnowflakeConnectionConfig(ConfigModel):
125
125
 
126
126
  @pydantic.validator("authentication_type", always=True)
127
127
  def authenticator_type_is_valid(cls, v, values):
128
- if v not in _VALID_AUTH_TYPES.keys():
128
+ if v not in _VALID_AUTH_TYPES:
129
129
  raise ValueError(
130
130
  f"unsupported authenticator type '{v}' was provided,"
131
131
  f" use one of {list(_VALID_AUTH_TYPES.keys())}"
@@ -312,7 +312,7 @@ class SnowflakeConnectionConfig(ConfigModel):
312
312
  raise ValueError(
313
313
  f"access_token not found in response {response}. "
314
314
  "Please check your OAuth configuration."
315
- )
315
+ ) from None
316
316
  connect_args = self.get_options()["connect_args"]
317
317
  return snowflake.connector.connect(
318
318
  user=self.username,
@@ -439,7 +439,7 @@ class SnowflakeV2Source(
439
439
  failure_reason=failure_message,
440
440
  )
441
441
 
442
- if c in _report.keys():
442
+ if c in _report:
443
443
  continue
444
444
 
445
445
  # If some capabilities are missing, then mark them as not capable
@@ -396,7 +396,7 @@ class AthenaSource(SQLAlchemySource):
396
396
  metadata.table_type if metadata.table_type else ""
397
397
  )
398
398
 
399
- location: Optional[str] = custom_properties.get("location", None)
399
+ location: Optional[str] = custom_properties.get("location")
400
400
  if location is not None:
401
401
  if location.startswith("s3://"):
402
402
  location = make_s3_urn(location, self.config.env)
@@ -538,7 +538,7 @@ class AthenaSource(SQLAlchemySource):
538
538
  column_name=column["name"],
539
539
  column_type=column["type"],
540
540
  inspector=inspector,
541
- description=column.get("comment", None),
541
+ description=column.get("comment"),
542
542
  nullable=column.get("nullable", True),
543
543
  is_part_of_key=(
544
544
  True
@@ -50,11 +50,7 @@ class DruidConfig(BasicSQLAlchemyConfig):
50
50
  """
51
51
 
52
52
  def get_identifier(self, schema: str, table: str) -> str:
53
- return (
54
- f"{self.platform_instance}.{table}"
55
- if self.platform_instance
56
- else f"{table}"
57
- )
53
+ return f"{table}"
58
54
 
59
55
 
60
56
  @platform_name("Druid")
@@ -204,7 +204,7 @@ def get_column_type(
204
204
  """
205
205
 
206
206
  TypeClass: Optional[Type] = None
207
- for sql_type in _field_type_mapping.keys():
207
+ for sql_type in _field_type_mapping:
208
208
  if isinstance(column_type, sql_type):
209
209
  TypeClass = _field_type_mapping[sql_type]
210
210
  break
@@ -973,7 +973,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
973
973
  inspector=inspector,
974
974
  )
975
975
  ),
976
- description=column.get("comment", None),
976
+ description=column.get("comment"),
977
977
  nullable=column["nullable"],
978
978
  recursive=False,
979
979
  globalTags=gtc,
@@ -317,10 +317,10 @@ def resolve_snowflake_modified_type(type_string: str) -> Any:
317
317
  match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
318
318
  if match:
319
319
  modified_type_base = match.group(1) # Extract the base type
320
- return SNOWFLAKE_TYPES_MAP.get(modified_type_base, None)
320
+ return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
321
321
 
322
322
  # Fallback for types without precision/scale
323
- return SNOWFLAKE_TYPES_MAP.get(type_string, None)
323
+ return SNOWFLAKE_TYPES_MAP.get(type_string)
324
324
 
325
325
 
326
326
  # see https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/sqlalchemy_bigquery/_types.py#L32
@@ -180,10 +180,11 @@ def optimized_get_columns(
180
180
  connection: Connection,
181
181
  table_name: str,
182
182
  schema: Optional[str] = None,
183
- tables_cache: MutableMapping[str, List[TeradataTable]] = {},
183
+ tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
184
184
  use_qvci: bool = False,
185
185
  **kw: Dict[str, Any],
186
186
  ) -> List[Dict]:
187
+ tables_cache = tables_cache or {}
187
188
  if schema is None:
188
189
  schema = self.default_schema_name
189
190
 
@@ -314,9 +315,10 @@ def optimized_get_view_definition(
314
315
  connection: Connection,
315
316
  view_name: str,
316
317
  schema: Optional[str] = None,
317
- tables_cache: MutableMapping[str, List[TeradataTable]] = {},
318
+ tables_cache: Optional[MutableMapping[str, List[TeradataTable]]] = None,
318
319
  **kw: Dict[str, Any],
319
320
  ) -> Optional[str]:
321
+ tables_cache = tables_cache or {}
320
322
  if schema is None:
321
323
  schema = self.default_schema_name
322
324
 
@@ -142,7 +142,7 @@ def get_table_comment(self, connection, table_name: str, schema: str = None, **k
142
142
  if col_value is not None:
143
143
  properties[col_name] = col_value
144
144
 
145
- return {"text": properties.get("comment", None), "properties": properties}
145
+ return {"text": properties.get("comment"), "properties": properties}
146
146
  else:
147
147
  return self.get_table_comment_default(connection, table_name, schema)
148
148
  except Exception:
@@ -483,7 +483,7 @@ def _parse_struct_fields(parts):
483
483
 
484
484
 
485
485
  def _parse_basic_datatype(s):
486
- for sql_type in _all_atomic_types.keys():
486
+ for sql_type in _all_atomic_types:
487
487
  if isinstance(s, sql_type):
488
488
  return {
489
489
  "type": _all_atomic_types[sql_type],