acryl-datahub 0.15.0.1rc7__py3-none-any.whl → 0.15.0.1rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc7.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/METADATA +2527 -2527
- {acryl_datahub-0.15.0.1rc7.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/RECORD +8 -8
- datahub/__init__.py +1 -1
- datahub/ingestion/source/mode.py +26 -20
- datahub/sql_parsing/tool_meta_extractor.py +4 -1
- {acryl_datahub-0.15.0.1rc7.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc7.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc7.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=Ed_HdiA9eGLLG0fhJKPwruUxl4bgAPR8p2MDlRHqts8,576
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -197,7 +197,7 @@ datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suab
|
|
|
197
197
|
datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
|
|
198
198
|
datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
|
|
199
199
|
datahub/ingestion/source/mlflow.py,sha256=-yWUuAEVBiNN-elz8Pgn0UeGsC3fVB20z1zKNIr4LXI,12309
|
|
200
|
-
datahub/ingestion/source/mode.py,sha256=
|
|
200
|
+
datahub/ingestion/source/mode.py,sha256=cq1KIpLxuplETF7sUW0hoMQIZG1cgga5BGHP54a28wE,63467
|
|
201
201
|
datahub/ingestion/source/mongodb.py,sha256=vZue4Nz0xaBoCUsQr3_0OIRkWRxeE_IH_Y_QKZ1s7S0,21077
|
|
202
202
|
datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPGPWQ,56146
|
|
203
203
|
datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
|
|
@@ -882,7 +882,7 @@ datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf
|
|
|
882
882
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
883
883
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=gUVq3NwZUzQByJs43JZXz8lZf0ZVzVt0FzaW5wZOwK4,47460
|
|
884
884
|
datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
|
|
885
|
-
datahub/sql_parsing/tool_meta_extractor.py,sha256=
|
|
885
|
+
datahub/sql_parsing/tool_meta_extractor.py,sha256=qEPq8RFWyK0tmSPNlluvd5cxgwbd2v6m9ViSY4hm2QM,6822
|
|
886
886
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
887
887
|
datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
|
|
888
888
|
datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
|
|
@@ -982,8 +982,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
982
982
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
983
983
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
984
984
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
985
|
-
acryl_datahub-0.15.0.
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
989
|
-
acryl_datahub-0.15.0.
|
|
985
|
+
acryl_datahub-0.15.0.1rc8.dist-info/METADATA,sha256=mW2V4Czvd-ZE_mUJX8XkNZxNwnBa-gLJxebl0KWsM2A,173642
|
|
986
|
+
acryl_datahub-0.15.0.1rc8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
987
|
+
acryl_datahub-0.15.0.1rc8.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
988
|
+
acryl_datahub-0.15.0.1rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
989
|
+
acryl_datahub-0.15.0.1rc8.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
datahub/ingestion/source/mode.py
CHANGED
|
@@ -5,6 +5,7 @@ import time
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from datetime import datetime, timezone
|
|
7
7
|
from functools import lru_cache
|
|
8
|
+
from json import JSONDecodeError
|
|
8
9
|
from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
|
|
9
10
|
|
|
10
11
|
import dateutil.parser as dp
|
|
@@ -193,6 +194,9 @@ class HTTPError429(HTTPError):
|
|
|
193
194
|
pass
|
|
194
195
|
|
|
195
196
|
|
|
197
|
+
ModeRequestError = (HTTPError, JSONDecodeError)
|
|
198
|
+
|
|
199
|
+
|
|
196
200
|
@dataclass
|
|
197
201
|
class ModeSourceReport(StaleEntityRemovalSourceReport):
|
|
198
202
|
filtered_spaces: LossyList[str] = dataclasses.field(default_factory=LossyList)
|
|
@@ -328,11 +332,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
328
332
|
# Test the connection
|
|
329
333
|
try:
|
|
330
334
|
self._get_request_json(f"{self.config.connect_uri}/api/verify")
|
|
331
|
-
except
|
|
335
|
+
except ModeRequestError as e:
|
|
332
336
|
self.report.report_failure(
|
|
333
337
|
title="Failed to Connect",
|
|
334
338
|
message="Unable to verify connection to mode.",
|
|
335
|
-
context=f"Error: {str(
|
|
339
|
+
context=f"Error: {str(e)}",
|
|
336
340
|
)
|
|
337
341
|
|
|
338
342
|
self.workspace_uri = f"{self.config.connect_uri}/api/{self.config.workspace}"
|
|
@@ -521,11 +525,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
521
525
|
if self.config.owner_username_instead_of_email
|
|
522
526
|
else user_json.get("email")
|
|
523
527
|
)
|
|
524
|
-
except
|
|
528
|
+
except ModeRequestError as e:
|
|
525
529
|
self.report.report_warning(
|
|
526
530
|
title="Failed to retrieve Mode creator",
|
|
527
531
|
message=f"Unable to retrieve user for {href}",
|
|
528
|
-
context=f"Reason: {str(
|
|
532
|
+
context=f"Reason: {str(e)}",
|
|
529
533
|
)
|
|
530
534
|
return user
|
|
531
535
|
|
|
@@ -571,11 +575,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
571
575
|
logging.debug(f"Skipping space {space_name} due to space pattern")
|
|
572
576
|
continue
|
|
573
577
|
space_info[s.get("token", "")] = s.get("name", "")
|
|
574
|
-
except
|
|
578
|
+
except ModeRequestError as e:
|
|
575
579
|
self.report.report_failure(
|
|
576
580
|
title="Failed to Retrieve Spaces",
|
|
577
581
|
message="Unable to retrieve spaces / collections for workspace.",
|
|
578
|
-
context=f"Workspace: {self.workspace_uri}, Error: {str(
|
|
582
|
+
context=f"Workspace: {self.workspace_uri}, Error: {str(e)}",
|
|
579
583
|
)
|
|
580
584
|
|
|
581
585
|
return space_info
|
|
@@ -721,11 +725,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
721
725
|
try:
|
|
722
726
|
ds_json = self._get_request_json(f"{self.workspace_uri}/data_sources")
|
|
723
727
|
data_sources = ds_json.get("_embedded", {}).get("data_sources", [])
|
|
724
|
-
except
|
|
728
|
+
except ModeRequestError as e:
|
|
725
729
|
self.report.report_failure(
|
|
726
730
|
title="Failed to retrieve Data Sources",
|
|
727
731
|
message="Unable to retrieve data sources from Mode.",
|
|
728
|
-
context=f"Error: {str(
|
|
732
|
+
context=f"Error: {str(e)}",
|
|
729
733
|
)
|
|
730
734
|
|
|
731
735
|
return data_sources
|
|
@@ -812,11 +816,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
812
816
|
if definition.get("name", "") == definition_name:
|
|
813
817
|
return definition.get("source", "")
|
|
814
818
|
|
|
815
|
-
except
|
|
819
|
+
except ModeRequestError as e:
|
|
816
820
|
self.report.report_failure(
|
|
817
821
|
title="Failed to Retrieve Definition",
|
|
818
822
|
message="Unable to retrieve definition from Mode.",
|
|
819
|
-
context=f"Definition Name: {definition_name}, Error: {str(
|
|
823
|
+
context=f"Definition Name: {definition_name}, Error: {str(e)}",
|
|
820
824
|
)
|
|
821
825
|
return None
|
|
822
826
|
|
|
@@ -1382,11 +1386,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1382
1386
|
f"{self.workspace_uri}/spaces/{space_token}/reports"
|
|
1383
1387
|
)
|
|
1384
1388
|
reports = reports_json.get("_embedded", {}).get("reports", {})
|
|
1385
|
-
except
|
|
1389
|
+
except ModeRequestError as e:
|
|
1386
1390
|
self.report.report_failure(
|
|
1387
1391
|
title="Failed to Retrieve Reports for Space",
|
|
1388
1392
|
message="Unable to retrieve reports for space token.",
|
|
1389
|
-
context=f"Space Token: {space_token}, Error: {str(
|
|
1393
|
+
context=f"Space Token: {space_token}, Error: {str(e)}",
|
|
1390
1394
|
)
|
|
1391
1395
|
return reports
|
|
1392
1396
|
|
|
@@ -1400,11 +1404,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1400
1404
|
url = f"{self.workspace_uri}/spaces/{space_token}/datasets"
|
|
1401
1405
|
datasets_json = self._get_request_json(url)
|
|
1402
1406
|
datasets = datasets_json.get("_embedded", {}).get("reports", [])
|
|
1403
|
-
except
|
|
1407
|
+
except ModeRequestError as e:
|
|
1404
1408
|
self.report.report_failure(
|
|
1405
1409
|
title="Failed to Retrieve Datasets for Space",
|
|
1406
1410
|
message=f"Unable to retrieve datasets for space token {space_token}.",
|
|
1407
|
-
context=f"Error: {str(
|
|
1411
|
+
context=f"Error: {str(e)}",
|
|
1408
1412
|
)
|
|
1409
1413
|
return datasets
|
|
1410
1414
|
|
|
@@ -1416,11 +1420,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1416
1420
|
f"{self.workspace_uri}/reports/{report_token}/queries"
|
|
1417
1421
|
)
|
|
1418
1422
|
queries = queries_json.get("_embedded", {}).get("queries", {})
|
|
1419
|
-
except
|
|
1423
|
+
except ModeRequestError as e:
|
|
1420
1424
|
self.report.report_failure(
|
|
1421
1425
|
title="Failed to Retrieve Queries",
|
|
1422
1426
|
message="Unable to retrieve queries for report token.",
|
|
1423
|
-
context=f"Report Token: {report_token}, Error: {str(
|
|
1427
|
+
context=f"Report Token: {report_token}, Error: {str(e)}",
|
|
1424
1428
|
)
|
|
1425
1429
|
return queries
|
|
1426
1430
|
|
|
@@ -1433,11 +1437,11 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1433
1437
|
f"{self.workspace_uri}/reports/{report_token}/runs/{report_run_id}/query_runs{query_run_id}"
|
|
1434
1438
|
)
|
|
1435
1439
|
queries = queries_json.get("_embedded", {}).get("queries", {})
|
|
1436
|
-
except
|
|
1440
|
+
except ModeRequestError as e:
|
|
1437
1441
|
self.report.report_failure(
|
|
1438
1442
|
title="Failed to Retrieve Queries for Report",
|
|
1439
1443
|
message="Unable to retrieve queries for report token.",
|
|
1440
|
-
context=f"Report Token:{report_token}, Error: {str(
|
|
1444
|
+
context=f"Report Token:{report_token}, Error: {str(e)}",
|
|
1441
1445
|
)
|
|
1442
1446
|
return {}
|
|
1443
1447
|
return queries
|
|
@@ -1451,13 +1455,13 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1451
1455
|
f"/queries/{query_token}/charts"
|
|
1452
1456
|
)
|
|
1453
1457
|
charts = charts_json.get("_embedded", {}).get("charts", {})
|
|
1454
|
-
except
|
|
1458
|
+
except ModeRequestError as e:
|
|
1455
1459
|
self.report.report_failure(
|
|
1456
1460
|
title="Failed to Retrieve Charts",
|
|
1457
1461
|
message="Unable to retrieve charts from Mode.",
|
|
1458
1462
|
context=f"Report Token: {report_token}, "
|
|
1459
1463
|
f"Query token: {query_token}, "
|
|
1460
|
-
f"Error: {str(
|
|
1464
|
+
f"Error: {str(e)}",
|
|
1461
1465
|
)
|
|
1462
1466
|
return charts
|
|
1463
1467
|
|
|
@@ -1477,6 +1481,8 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
1477
1481
|
response = self.session.get(
|
|
1478
1482
|
url, timeout=self.config.api_options.timeout
|
|
1479
1483
|
)
|
|
1484
|
+
if response.status_code == 204: # No content, don't parse json
|
|
1485
|
+
return {}
|
|
1480
1486
|
return response.json()
|
|
1481
1487
|
except HTTPError as http_error:
|
|
1482
1488
|
error_response = http_error.response
|
|
@@ -40,6 +40,7 @@ def _get_last_line(query: str) -> str:
|
|
|
40
40
|
class ToolMetaExtractorReport(Report):
|
|
41
41
|
num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict)
|
|
42
42
|
failures: List[str] = field(default_factory=list)
|
|
43
|
+
looker_user_mapping_missing: Optional[bool] = None
|
|
43
44
|
|
|
44
45
|
|
|
45
46
|
class ToolMetaExtractor:
|
|
@@ -108,7 +109,9 @@ class ToolMetaExtractor:
|
|
|
108
109
|
PlatformResource.search_by_filters(query=query, graph_client=graph)
|
|
109
110
|
)
|
|
110
111
|
|
|
111
|
-
if len(platform_resources)
|
|
112
|
+
if len(platform_resources) == 0:
|
|
113
|
+
report.looker_user_mapping_missing = True
|
|
114
|
+
elif len(platform_resources) > 1:
|
|
112
115
|
report.failures.append(
|
|
113
116
|
"Looker user metadata extraction failed. Found more than one looker user id mappings."
|
|
114
117
|
)
|
|
File without changes
|
{acryl_datahub-0.15.0.1rc7.dist-info → acryl_datahub-0.15.0.1rc8.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|