acryl-datahub 1.0.0rc6__py3-none-any.whl → 1.0.0rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/METADATA +2520 -2520
- {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/RECORD +35 -35
- {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/WHEEL +1 -1
- datahub/_version.py +1 -1
- datahub/cli/docker_cli.py +1 -1
- datahub/cli/iceberg_cli.py +1 -1
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/configuration/kafka.py +1 -1
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/graph/client.py +15 -6
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/source/cassandra/cassandra_api.py +2 -1
- datahub/ingestion/source/csv_enricher.py +2 -2
- datahub/ingestion/source/dremio/dremio_api.py +3 -3
- datahub/ingestion/source/dremio/dremio_aspects.py +2 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/lookml_source.py +1 -1
- datahub/ingestion/source/metabase.py +54 -32
- datahub/ingestion/source/mode.py +1 -1
- datahub/ingestion/source/pulsar.py +2 -2
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_connection.py +1 -1
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/tableau/tableau.py +2 -1
- datahub/ingestion/source/tableau/tableau_common.py +2 -1
- datahub/lite/duckdb_lite.py +2 -1
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/utilities/memory_footprint.py +3 -2
- {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc6.dist-info → acryl_datahub-1.0.0rc7.dist-info}/top_level.txt +0 -0
|
@@ -69,9 +69,19 @@ class MetabaseConfig(DatasetLineageProviderConfigBase, StatefulIngestionConfigBa
|
|
|
69
69
|
default=None,
|
|
70
70
|
description="optional URL to use in links (if `connect_uri` is only for ingestion)",
|
|
71
71
|
)
|
|
72
|
-
username: Optional[str] = Field(
|
|
72
|
+
username: Optional[str] = Field(
|
|
73
|
+
default=None,
|
|
74
|
+
description="Metabase username, used when an API key is not provided.",
|
|
75
|
+
)
|
|
73
76
|
password: Optional[pydantic.SecretStr] = Field(
|
|
74
|
-
default=None,
|
|
77
|
+
default=None,
|
|
78
|
+
description="Metabase password, used when an API key is not provided.",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# https://www.metabase.com/learn/metabase-basics/administration/administration-and-operation/metabase-api#example-get-request
|
|
82
|
+
api_key: Optional[pydantic.SecretStr] = Field(
|
|
83
|
+
default=None,
|
|
84
|
+
description="Metabase API key. If provided, the username and password will be ignored. Recommended method.",
|
|
75
85
|
)
|
|
76
86
|
# TODO: Check and remove this if no longer needed.
|
|
77
87
|
# Config database_alias is removed from sql sources.
|
|
@@ -178,30 +188,40 @@ class MetabaseSource(StatefulIngestionSourceBase):
|
|
|
178
188
|
self.source_config: MetabaseConfig = config
|
|
179
189
|
|
|
180
190
|
def setup_session(self) -> None:
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
191
|
+
self.session = requests.session()
|
|
192
|
+
if self.config.api_key:
|
|
193
|
+
self.session.headers.update(
|
|
194
|
+
{
|
|
195
|
+
"x-api-key": self.config.api_key.get_secret_value(),
|
|
196
|
+
"Content-Type": "application/json",
|
|
197
|
+
"Accept": "*/*",
|
|
198
|
+
}
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
# If no API key is provided, generate a session token using username and password.
|
|
202
|
+
login_response = requests.post(
|
|
203
|
+
f"{self.config.connect_uri}/api/session",
|
|
204
|
+
None,
|
|
205
|
+
{
|
|
206
|
+
"username": self.config.username,
|
|
207
|
+
"password": (
|
|
208
|
+
self.config.password.get_secret_value()
|
|
209
|
+
if self.config.password
|
|
210
|
+
else None
|
|
211
|
+
),
|
|
212
|
+
},
|
|
213
|
+
)
|
|
193
214
|
|
|
194
|
-
|
|
195
|
-
|
|
215
|
+
login_response.raise_for_status()
|
|
216
|
+
self.access_token = login_response.json().get("id", "")
|
|
196
217
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
)
|
|
218
|
+
self.session.headers.update(
|
|
219
|
+
{
|
|
220
|
+
"X-Metabase-Session": f"{self.access_token}",
|
|
221
|
+
"Content-Type": "application/json",
|
|
222
|
+
"Accept": "*/*",
|
|
223
|
+
}
|
|
224
|
+
)
|
|
205
225
|
|
|
206
226
|
# Test the connection
|
|
207
227
|
try:
|
|
@@ -217,15 +237,17 @@ class MetabaseSource(StatefulIngestionSourceBase):
|
|
|
217
237
|
)
|
|
218
238
|
|
|
219
239
|
def close(self) -> None:
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
self.report.report_failure(
|
|
226
|
-
title="Unable to Log User Out",
|
|
227
|
-
message=f"Unable to logout for user {self.config.username}",
|
|
240
|
+
# API key authentication does not require session closure.
|
|
241
|
+
if not self.config.api_key:
|
|
242
|
+
response = requests.delete(
|
|
243
|
+
f"{self.config.connect_uri}/api/session",
|
|
244
|
+
headers={"X-Metabase-Session": self.access_token},
|
|
228
245
|
)
|
|
246
|
+
if response.status_code not in (200, 204):
|
|
247
|
+
self.report.report_failure(
|
|
248
|
+
title="Unable to Log User Out",
|
|
249
|
+
message=f"Unable to logout for user {self.config.username}",
|
|
250
|
+
)
|
|
229
251
|
super().close()
|
|
230
252
|
|
|
231
253
|
def emit_dashboard_mces(self) -> Iterable[MetadataWorkUnit]:
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -1494,7 +1494,7 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1494
1494
|
sleep_time = error_response.headers.get("retry-after")
|
|
1495
1495
|
if sleep_time is not None:
|
|
1496
1496
|
time.sleep(float(sleep_time))
|
|
1497
|
-
raise HTTPError429
|
|
1497
|
+
raise HTTPError429 from None
|
|
1498
1498
|
|
|
1499
1499
|
raise http_error
|
|
1500
1500
|
|
|
@@ -230,8 +230,8 @@ class PulsarSource(StatefulIngestionSourceBase):
|
|
|
230
230
|
self.report.report_warning("HTTPError", message)
|
|
231
231
|
except requests.exceptions.RequestException as e:
|
|
232
232
|
raise Exception(
|
|
233
|
-
|
|
234
|
-
)
|
|
233
|
+
"An ambiguous exception occurred while handling the request"
|
|
234
|
+
) from e
|
|
235
235
|
|
|
236
236
|
@classmethod
|
|
237
237
|
def create(cls, config_dict, ctx):
|
|
@@ -124,7 +124,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
124
124
|
try:
|
|
125
125
|
self.sigma_api = SigmaAPI(self.config, self.reporter)
|
|
126
126
|
except Exception as e:
|
|
127
|
-
raise ConfigurationError(
|
|
127
|
+
raise ConfigurationError("Unable to connect sigma API") from e
|
|
128
128
|
|
|
129
129
|
@staticmethod
|
|
130
130
|
def test_connection(config_dict: dict) -> TestConnectionReport:
|
|
@@ -312,7 +312,7 @@ class SnowflakeConnectionConfig(ConfigModel):
|
|
|
312
312
|
raise ValueError(
|
|
313
313
|
f"access_token not found in response {response}. "
|
|
314
314
|
"Please check your OAuth configuration."
|
|
315
|
-
)
|
|
315
|
+
) from None
|
|
316
316
|
connect_args = self.get_options()["connect_args"]
|
|
317
317
|
return snowflake.connector.connect(
|
|
318
318
|
user=self.username,
|
|
@@ -50,11 +50,7 @@ class DruidConfig(BasicSQLAlchemyConfig):
|
|
|
50
50
|
"""
|
|
51
51
|
|
|
52
52
|
def get_identifier(self, schema: str, table: str) -> str:
|
|
53
|
-
return
|
|
54
|
-
f"{self.platform_instance}.{table}"
|
|
55
|
-
if self.platform_instance
|
|
56
|
-
else f"{table}"
|
|
57
|
-
)
|
|
53
|
+
return f"{table}"
|
|
58
54
|
|
|
59
55
|
|
|
60
56
|
@platform_name("Druid")
|
|
@@ -1562,8 +1562,9 @@ class TableauSiteSource:
|
|
|
1562
1562
|
query: str,
|
|
1563
1563
|
connection_type: str,
|
|
1564
1564
|
page_size: int,
|
|
1565
|
-
query_filter: dict =
|
|
1565
|
+
query_filter: Optional[dict] = None,
|
|
1566
1566
|
) -> Iterable[dict]:
|
|
1567
|
+
query_filter = query_filter or {}
|
|
1567
1568
|
query_filter = optimize_query_filter(query_filter)
|
|
1568
1569
|
|
|
1569
1570
|
# Calls the get_connection_object_page function to get the objects,
|
|
@@ -514,7 +514,8 @@ FIELD_TYPE_MAPPING = {
|
|
|
514
514
|
}
|
|
515
515
|
|
|
516
516
|
|
|
517
|
-
def get_tags_from_params(params: List[str] =
|
|
517
|
+
def get_tags_from_params(params: Optional[List[str]] = None) -> GlobalTagsClass:
|
|
518
|
+
params = params or []
|
|
518
519
|
tags = [
|
|
519
520
|
TagAssociationClass(tag=builder.make_tag_urn(tag.upper()))
|
|
520
521
|
for tag in params
|
datahub/lite/duckdb_lite.py
CHANGED
|
@@ -284,9 +284,10 @@ class DuckDBLite(DataHubLiteLocal[DuckDBLiteConfig]):
|
|
|
284
284
|
self,
|
|
285
285
|
query: str,
|
|
286
286
|
flavor: SearchFlavor,
|
|
287
|
-
aspects: List[str] =
|
|
287
|
+
aspects: Optional[List[str]] = None,
|
|
288
288
|
snippet: bool = True,
|
|
289
289
|
) -> Iterable[Searchable]:
|
|
290
|
+
aspects = aspects or []
|
|
290
291
|
if flavor == SearchFlavor.FREE_TEXT:
|
|
291
292
|
base_query = f"SELECT distinct(urn), 'urn', NULL from metadata_aspect_v2 where urn ILIKE '%{query}%' UNION SELECT urn, aspect_name, metadata from metadata_aspect_v2 where metadata->>'$.name' ILIKE '%{query}%'"
|
|
292
293
|
for r in self.duckdb_client.execute(base_query).fetchall():
|
datahub/lite/lite_local.py
CHANGED
datahub/lite/lite_util.py
CHANGED
|
@@ -70,9 +70,10 @@ class DataHubLiteWrapper(DataHubLiteLocal):
|
|
|
70
70
|
self,
|
|
71
71
|
query: str,
|
|
72
72
|
flavor: SearchFlavor,
|
|
73
|
-
aspects: List[str] =
|
|
73
|
+
aspects: Optional[List[str]] = None,
|
|
74
74
|
snippet: bool = True,
|
|
75
75
|
) -> Iterable[Searchable]:
|
|
76
|
+
aspects = aspects or []
|
|
76
77
|
yield from self.lite.search(query, flavor, aspects, snippet)
|
|
77
78
|
|
|
78
79
|
def ls(self, path: str) -> List[Browseable]:
|
|
@@ -96,10 +97,10 @@ def get_datahub_lite(config_dict: dict, read_only: bool = False) -> "DataHubLite
|
|
|
96
97
|
lite_type = lite_local_config.type
|
|
97
98
|
try:
|
|
98
99
|
lite_class = lite_registry.get(lite_type)
|
|
99
|
-
except KeyError:
|
|
100
|
+
except KeyError as e:
|
|
100
101
|
raise Exception(
|
|
101
102
|
f"Failed to find a registered lite implementation for {lite_type}. Valid values are {[k for k in lite_registry.mapping.keys()]}"
|
|
102
|
-
)
|
|
103
|
+
) from e
|
|
103
104
|
|
|
104
105
|
lite_specific_config = lite_class.get_config_class().parse_obj(
|
|
105
106
|
lite_local_config.config
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from collections import deque
|
|
2
2
|
from itertools import chain
|
|
3
3
|
from sys import getsizeof
|
|
4
|
-
from typing import Any, Iterator
|
|
4
|
+
from typing import Any, Iterator, Optional
|
|
5
5
|
|
|
6
6
|
|
|
7
|
-
def total_size(o: Any, handlers: Any =
|
|
7
|
+
def total_size(o: Any, handlers: Optional[Any] = None) -> int:
|
|
8
8
|
"""Returns the approximate memory footprint an object and all of its contents.
|
|
9
9
|
Automatically finds the contents of the following builtin containers and
|
|
10
10
|
their subclasses: tuple, list, deque, dict, set and frozenset.
|
|
@@ -14,6 +14,7 @@ def total_size(o: Any, handlers: Any = {}) -> int:
|
|
|
14
14
|
|
|
15
15
|
Based on https://github.com/ActiveState/recipe-577504-compute-mem-footprint/blob/master/recipe.py
|
|
16
16
|
"""
|
|
17
|
+
handlers = handlers or {}
|
|
17
18
|
|
|
18
19
|
def dict_handler(d: dict) -> Iterator[Any]:
|
|
19
20
|
return chain.from_iterable(d.items())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|