acryl-datahub 1.0.0rc6__py3-none-any.whl → 1.0.0rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (35) hide show
  1. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/METADATA +2520 -2520
  2. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/RECORD +35 -35
  3. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/cli/docker_cli.py +1 -1
  6. datahub/cli/iceberg_cli.py +1 -1
  7. datahub/cli/lite_cli.py +4 -2
  8. datahub/cli/specific/dataproduct_cli.py +1 -1
  9. datahub/configuration/kafka.py +1 -1
  10. datahub/ingestion/fs/s3_fs.py +2 -2
  11. datahub/ingestion/graph/client.py +15 -6
  12. datahub/ingestion/graph/entity_versioning.py +3 -3
  13. datahub/ingestion/source/cassandra/cassandra_api.py +2 -1
  14. datahub/ingestion/source/csv_enricher.py +2 -2
  15. datahub/ingestion/source/dremio/dremio_api.py +3 -3
  16. datahub/ingestion/source/dremio/dremio_aspects.py +2 -1
  17. datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
  18. datahub/ingestion/source/kafka/kafka.py +1 -1
  19. datahub/ingestion/source/looker/looker_file_loader.py +2 -2
  20. datahub/ingestion/source/looker/lookml_source.py +1 -1
  21. datahub/ingestion/source/metabase.py +54 -32
  22. datahub/ingestion/source/mode.py +1 -1
  23. datahub/ingestion/source/pulsar.py +2 -2
  24. datahub/ingestion/source/sigma/sigma.py +1 -1
  25. datahub/ingestion/source/snowflake/snowflake_connection.py +1 -1
  26. datahub/ingestion/source/sql/druid.py +1 -5
  27. datahub/ingestion/source/tableau/tableau.py +2 -1
  28. datahub/ingestion/source/tableau/tableau_common.py +2 -1
  29. datahub/lite/duckdb_lite.py +2 -1
  30. datahub/lite/lite_local.py +1 -1
  31. datahub/lite/lite_util.py +4 -3
  32. datahub/utilities/memory_footprint.py +3 -2
  33. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/LICENSE +0 -0
  34. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/entry_points.txt +0 -0
  35. {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/top_level.txt +0 -0
@@ -69,9 +69,19 @@ class MetabaseConfig(DatasetLineageProviderConfigBase, StatefulIngestionConfigBa
69
69
  default=None,
70
70
  description="optional URL to use in links (if `connect_uri` is only for ingestion)",
71
71
  )
72
- username: Optional[str] = Field(default=None, description="Metabase username.")
72
+ username: Optional[str] = Field(
73
+ default=None,
74
+ description="Metabase username, used when an API key is not provided.",
75
+ )
73
76
  password: Optional[pydantic.SecretStr] = Field(
74
- default=None, description="Metabase password."
77
+ default=None,
78
+ description="Metabase password, used when an API key is not provided.",
79
+ )
80
+
81
+ # https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#example-get-request
82
+ api_key: Optional[pydantic.SecretStr] = Field(
83
+ default=None,
84
+ description="Metabase API key. If provided, the username and password will be ignored. Recommended method.",
75
85
  )
76
86
  # TODO: Check and remove this if no longer needed.
77
87
  # Config database_alias is removed from sql sources.
@@ -178,30 +188,40 @@ class MetabaseSource(StatefulIngestionSourceBase):
178
188
  self.source_config: MetabaseConfig = config
179
189
 
180
190
  def setup_session(self) -> None:
181
- login_response = requests.post(
182
- f"{self.config.connect_uri}/api/session",
183
- None,
184
- {
185
- "username": self.config.username,
186
- "password": (
187
- self.config.password.get_secret_value()
188
- if self.config.password
189
- else None
190
- ),
191
- },
192
- )
191
+ self.session = requests.session()
192
+ if self.config.api_key:
193
+ self.session.headers.update(
194
+ {
195
+ "x-api-key": self.config.api_key.get_secret_value(),
196
+ "Content-Type": "application/json",
197
+ "Accept": "*/*",
198
+ }
199
+ )
200
+ else:
201
+ # If no API key is provided, generate a session token using username and password.
202
+ login_response = requests.post(
203
+ f"{self.config.connect_uri}/api/session",
204
+ None,
205
+ {
206
+ "username": self.config.username,
207
+ "password": (
208
+ self.config.password.get_secret_value()
209
+ if self.config.password
210
+ else None
211
+ ),
212
+ },
213
+ )
193
214
 
194
- login_response.raise_for_status()
195
- self.access_token = login_response.json().get("id", "")
215
+ login_response.raise_for_status()
216
+ self.access_token = login_response.json().get("id", "")
196
217
 
197
- self.session = requests.session()
198
- self.session.headers.update(
199
- {
200
- "X-Metabase-Session": f"{self.access_token}",
201
- "Content-Type": "application/json",
202
- "Accept": "*/*",
203
- }
204
- )
218
+ self.session.headers.update(
219
+ {
220
+ "X-Metabase-Session": f"{self.access_token}",
221
+ "Content-Type": "application/json",
222
+ "Accept": "*/*",
223
+ }
224
+ )
205
225
 
206
226
  # Test the connection
207
227
  try:
@@ -217,15 +237,17 @@ class MetabaseSource(StatefulIngestionSourceBase):
217
237
  )
218
238
 
219
239
  def close(self) -> None:
220
- response = requests.delete(
221
- f"{self.config.connect_uri}/api/session",
222
- headers={"X-Metabase-Session": self.access_token},
223
- )
224
- if response.status_code not in (200, 204):
225
- self.report.report_failure(
226
- title="Unable to Log User Out",
227
- message=f"Unable to logout for user {self.config.username}",
240
+ # API key authentication does not require session closure.
241
+ if not self.config.api_key:
242
+ response = requests.delete(
243
+ f"{self.config.connect_uri}/api/session",
244
+ headers={"X-Metabase-Session": self.access_token},
228
245
  )
246
+ if response.status_code not in (200, 204):
247
+ self.report.report_failure(
248
+ title="Unable to Log User Out",
249
+ message=f"Unable to logout for user {self.config.username}",
250
+ )
229
251
  super().close()
230
252
 
231
253
  def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
@@ -1494,7 +1494,7 @@ class ModeSource(StatefulIngestionSourceBase):
1494
1494
  sleep_time = error_response.headers.get("retry-after")
1495
1495
  if sleep_time is not None:
1496
1496
  time.sleep(float(sleep_time))
1497
- raise HTTPError429
1497
+ raise HTTPError429 from None
1498
1498
 
1499
1499
  raise http_error
1500
1500
 
@@ -230,8 +230,8 @@ class PulsarSource(StatefulIngestionSourceBase):
230
230
  self.report.report_warning("HTTPError", message)
231
231
  except requests.exceptions.RequestException as e:
232
232
  raise Exception(
233
- f"An ambiguous exception occurred while handling the request: {e}"
234
- )
233
+ "An ambiguous exception occurred while handling the request"
234
+ ) from e
235
235
 
236
236
  @classmethod
237
237
  def create(cls, config_dict, ctx):
@@ -124,7 +124,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
124
124
  try:
125
125
  self.sigma_api = SigmaAPI(self.config, self.reporter)
126
126
  except Exception as e:
127
- raise ConfigurationError(f"Unable to connect sigma API. Exception: {e}")
127
+ raise ConfigurationError("Unable to connect sigma API") from e
128
128
 
129
129
  @staticmethod
130
130
  def test_connection(config_dict: dict) -> TestConnectionReport:
@@ -312,7 +312,7 @@ class SnowflakeConnectionConfig(ConfigModel):
312
312
  raise ValueError(
313
313
  f"access_token not found in response {response}. "
314
314
  "Please check your OAuth configuration."
315
- )
315
+ ) from None
316
316
  connect_args = self.get_options()["connect_args"]
317
317
  return snowflake.connector.connect(
318
318
  user=self.username,
@@ -50,11 +50,7 @@ class DruidConfig(BasicSQLAlchemyConfig):
50
50
  """
51
51
 
52
52
  def get_identifier(self, schema: str, table: str) -> str:
53
- return (
54
- f"{self.platform_instance}.{table}"
55
- if self.platform_instance
56
- else f"{table}"
57
- )
53
+ return f"{table}"
58
54
 
59
55
 
60
56
  @platform_name("Druid")
@@ -1562,8 +1562,9 @@ class TableauSiteSource:
1562
1562
  query: str,
1563
1563
  connection_type: str,
1564
1564
  page_size: int,
1565
- query_filter: dict = {},
1565
+ query_filter: Optional[dict] = None,
1566
1566
  ) -> Iterable[dict]:
1567
+ query_filter = query_filter or {}
1567
1568
  query_filter = optimize_query_filter(query_filter)
1568
1569
 
1569
1570
  # Calls the get_connection_object_page function to get the objects,
@@ -514,7 +514,8 @@ FIELD_TYPE_MAPPING = {
514
514
  }
515
515
 
516
516
 
517
- def get_tags_from_params(params: List[str] = []) -> GlobalTagsClass:
517
+ def get_tags_from_params(params: Optional[List[str]] = None) -> GlobalTagsClass:
518
+ params = params or []
518
519
  tags = [
519
520
  TagAssociationClass(tag=builder.make_tag_urn(tag.upper()))
520
521
  for tag in params
@@ -284,9 +284,10 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
284
284
  self,
285
285
  query: str,
286
286
  flavor: SearchFlavor,
287
- aspects: List[str] = [],
287
+ aspects: Optional[List[str]] = None,
288
288
  snippet: bool = True,
289
289
  ) -> Iterable[Searchable]:
290
+ aspects = aspects or []
290
291
  if flavor == SearchFlavor.FREE_TEXT:
291
292
  base_query = f"SELECT distinct(urn), 'urn', NULL from metadata_aspect_v2 where urn ILIKE '%{query}%' UNION SELECT urn, aspect_name, metadata from metadata_aspect_v2 where metadata->>'$.name' ILIKE '%{query}%'"
292
293
  for r in self.duckdb_client.execute(base_query).fetchall():
@@ -90,7 +90,7 @@ class DataHubLiteLocal(Generic[LiteConfig], Closeable, metaclass=ABCMeta):
90
90
  self,
91
91
  query: str,
92
92
  flavor: SearchFlavor,
93
- aspects: List[str] = [],
93
+ aspects: Optional[List[str]] = None,
94
94
  snippet: bool = True,
95
95
  ) -> Iterable[Searchable]:
96
96
  pass
datahub/lite/lite_util.py CHANGED
@@ -70,9 +70,10 @@ class DataHubLiteWrapper(DataHubLiteLocal):
70
70
  self,
71
71
  query: str,
72
72
  flavor: SearchFlavor,
73
- aspects: List[str] = [],
73
+ aspects: Optional[List[str]] = None,
74
74
  snippet: bool = True,
75
75
  ) -> Iterable[Searchable]:
76
+ aspects = aspects or []
76
77
  yield from self.lite.search(query, flavor, aspects, snippet)
77
78
 
78
79
  def ls(self, path: str) -> List[Browseable]:
@@ -96,10 +97,10 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite
96
97
  lite_type = lite_local_config.type
97
98
  try:
98
99
  lite_class = lite_registry.get(lite_type)
99
- except KeyError:
100
+ except KeyError as e:
100
101
  raise Exception(
101
102
  f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping.keys()]}"
102
- )
103
+ ) from e
103
104
 
104
105
  lite_specific_config = lite_class.get_config_class().parse_obj(
105
106
  lite_local_config.config
@@ -1,10 +1,10 @@
1
1
  from collections import deque
2
2
  from itertools import chain
3
3
  from sys import getsizeof
4
- from typing import Any, Iterator
4
+ from typing import Any, Iterator, Optional
5
5
 
6
6
 
7
- def total_size(o: Any, handlers: Any = {}) -> int:
7
+ def total_size(o: Any, handlers: Optional[Any] = None) -> int:
8
8
  """Returns the approximate memory footprint an object and all of its contents.
9
9
  Automatically finds the contents of the following builtin containers and
10
10
  their subclasses: tuple, list, deque, dict, set and frozenset.
@@ -14,6 +14,7 @@ def total_size(o: Any, handlers: Any = {}) -> int:
14
14
 
15
15
  Based on https://github.com/ActiveState/recipe-577504-compute-mem-footprint/blob/master/recipe.py
16
16
  """
17
+ handlers = handlers or {}
17
18
 
18
19
  def dict_handler(d: dict) -> Iterator[Any]:
19
20
  return chain.from_iterable(d.items())