acryl-datahub 0.15.0.1rc7__py3-none-any.whl → 0.15.0.1rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=dj0h5Hq8a33nXbLNFmlqql5K3OaWumjRX8IsgKQUCfs,576
1
+ datahub/__init__.py,sha256=Ed_HdiA9eGLLG0fhJKPwruUxl4bgAPR8p2MDlRHqts8,576
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -197,7 +197,7 @@ datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suab
197
197
  datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
198
198
  datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
199
199
  datahub/ingestion/source/mlflow.py,sha256=-yWUuAEVBiNN-elz8Pgn0UeGsC3fVB20z1zKNIr4LXI,12309
200
- datahub/ingestion/source/mode.py,sha256=fuDTByENqcbxViFyYjU70B86FyAYr3Pk9usIBI0Vl1U,63384
200
+ datahub/ingestion/source/mode.py,sha256=cq1KIpLxuplETF7sUW0hoMQIZG1cgga5BGHP54a28wE,63467
201
201
  datahub/ingestion/source/mongodb.py,sha256=vZue4Nz0xaBoCUsQr3_0OIRkWRxeE_IH_Y_QKZ1s7S0,21077
202
202
  datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPGPWQ,56146
203
203
  datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
@@ -882,7 +882,7 @@ datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf
882
882
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
883
883
  datahub/sql_parsing/sqlglot_lineage.py,sha256=gUVq3NwZUzQByJs43JZXz8lZf0ZVzVt0FzaW5wZOwK4,47460
884
884
  datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
885
- datahub/sql_parsing/tool_meta_extractor.py,sha256=7tY4FAClhFcqwc23lGVlnT6Dequ_5Xcpbt0hDvnlLzM,6670
885
+ datahub/sql_parsing/tool_meta_extractor.py,sha256=qEPq8RFWyK0tmSPNlluvd5cxgwbd2v6m9ViSY4hm2QM,6822
886
886
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
887
887
  datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
888
888
  datahub/telemetry/telemetry.py,sha256=gzla-QGNsynGg2FqFxiDDFQ0emG53MJ9lhOA2-UUg-Y,15047
@@ -982,8 +982,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
982
982
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
983
983
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
984
984
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
985
- acryl_datahub-0.15.0.1rc7.dist-info/METADATA,sha256=hl14lRgFU4pk8d2s_Qxx1Xtkbd2TQp6gEek2gpkea1o,173642
986
- acryl_datahub-0.15.0.1rc7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
987
- acryl_datahub-0.15.0.1rc7.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
988
- acryl_datahub-0.15.0.1rc7.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
989
- acryl_datahub-0.15.0.1rc7.dist-info/RECORD,,
985
+ acryl_datahub-0.15.0.1rc8.dist-info/METADATA,sha256=mW2V4Czvd-ZE_mUJX8XkNZxNwnBa-gLJxebl0KWsM2A,173642
986
+ acryl_datahub-0.15.0.1rc8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
987
+ acryl_datahub-0.15.0.1rc8.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
988
+ acryl_datahub-0.15.0.1rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
989
+ acryl_datahub-0.15.0.1rc8.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.1rc7"
6
+ __version__ = "0.15.0.1rc8"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -5,6 +5,7 @@ import time
5
5
  from dataclasses import dataclass
6
6
  from datetime import datetime, timezone
7
7
  from functools import lru_cache
8
+ from json import JSONDecodeError
8
9
  from typing import Dict, Iterable, List, Optional, Set, Tuple, Union
9
10
 
10
11
  import dateutil.parser as dp
@@ -193,6 +194,9 @@ class HTTPError429(HTTPError):
193
194
  pass
194
195
 
195
196
 
197
+ ModeRequestError = (HTTPError, JSONDecodeError)
198
+
199
+
196
200
  @dataclass
197
201
  class ModeSourceReport(StaleEntityRemovalSourceReport):
198
202
  filtered_spaces: LossyList[str] = dataclasses.field(default_factory=LossyList)
@@ -328,11 +332,11 @@ class ModeSource(StatefulIngestionSourceBase):
328
332
  # Test the connection
329
333
  try:
330
334
  self._get_request_json(f"{self.config.connect_uri}/api/verify")
331
- except HTTPError as http_error:
335
+ except ModeRequestError as e:
332
336
  self.report.report_failure(
333
337
  title="Failed to Connect",
334
338
  message="Unable to verify connection to mode.",
335
- context=f"Error: {str(http_error)}",
339
+ context=f"Error: {str(e)}",
336
340
  )
337
341
 
338
342
  self.workspace_uri = f"{self.config.connect_uri}/api/{self.config.workspace}"
@@ -521,11 +525,11 @@ class ModeSource(StatefulIngestionSourceBase):
521
525
  if self.config.owner_username_instead_of_email
522
526
  else user_json.get("email")
523
527
  )
524
- except HTTPError as http_error:
528
+ except ModeRequestError as e:
525
529
  self.report.report_warning(
526
530
  title="Failed to retrieve Mode creator",
527
531
  message=f"Unable to retrieve user for {href}",
528
- context=f"Reason: {str(http_error)}",
532
+ context=f"Reason: {str(e)}",
529
533
  )
530
534
  return user
531
535
 
@@ -571,11 +575,11 @@ class ModeSource(StatefulIngestionSourceBase):
571
575
  logging.debug(f"Skipping space {space_name} due to space pattern")
572
576
  continue
573
577
  space_info[s.get("token", "")] = s.get("name", "")
574
- except HTTPError as http_error:
578
+ except ModeRequestError as e:
575
579
  self.report.report_failure(
576
580
  title="Failed to Retrieve Spaces",
577
581
  message="Unable to retrieve spaces / collections for workspace.",
578
- context=f"Workspace: {self.workspace_uri}, Error: {str(http_error)}",
582
+ context=f"Workspace: {self.workspace_uri}, Error: {str(e)}",
579
583
  )
580
584
 
581
585
  return space_info
@@ -721,11 +725,11 @@ class ModeSource(StatefulIngestionSourceBase):
721
725
  try:
722
726
  ds_json = self._get_request_json(f"{self.workspace_uri}/data_sources")
723
727
  data_sources = ds_json.get("_embedded", {}).get("data_sources", [])
724
- except HTTPError as http_error:
728
+ except ModeRequestError as e:
725
729
  self.report.report_failure(
726
730
  title="Failed to retrieve Data Sources",
727
731
  message="Unable to retrieve data sources from Mode.",
728
- context=f"Error: {str(http_error)}",
732
+ context=f"Error: {str(e)}",
729
733
  )
730
734
 
731
735
  return data_sources
@@ -812,11 +816,11 @@ class ModeSource(StatefulIngestionSourceBase):
812
816
  if definition.get("name", "") == definition_name:
813
817
  return definition.get("source", "")
814
818
 
815
- except HTTPError as http_error:
819
+ except ModeRequestError as e:
816
820
  self.report.report_failure(
817
821
  title="Failed to Retrieve Definition",
818
822
  message="Unable to retrieve definition from Mode.",
819
- context=f"Definition Name: {definition_name}, Error: {str(http_error)}",
823
+ context=f"Definition Name: {definition_name}, Error: {str(e)}",
820
824
  )
821
825
  return None
822
826
 
@@ -1382,11 +1386,11 @@ class ModeSource(StatefulIngestionSourceBase):
1382
1386
  f"{self.workspace_uri}/spaces/{space_token}/reports"
1383
1387
  )
1384
1388
  reports = reports_json.get("_embedded", {}).get("reports", {})
1385
- except HTTPError as http_error:
1389
+ except ModeRequestError as e:
1386
1390
  self.report.report_failure(
1387
1391
  title="Failed to Retrieve Reports for Space",
1388
1392
  message="Unable to retrieve reports for space token.",
1389
- context=f"Space Token: {space_token}, Error: {str(http_error)}",
1393
+ context=f"Space Token: {space_token}, Error: {str(e)}",
1390
1394
  )
1391
1395
  return reports
1392
1396
 
@@ -1400,11 +1404,11 @@ class ModeSource(StatefulIngestionSourceBase):
1400
1404
  url = f"{self.workspace_uri}/spaces/{space_token}/datasets"
1401
1405
  datasets_json = self._get_request_json(url)
1402
1406
  datasets = datasets_json.get("_embedded", {}).get("reports", [])
1403
- except HTTPError as http_error:
1407
+ except ModeRequestError as e:
1404
1408
  self.report.report_failure(
1405
1409
  title="Failed to Retrieve Datasets for Space",
1406
1410
  message=f"Unable to retrieve datasets for space token {space_token}.",
1407
- context=f"Error: {str(http_error)}",
1411
+ context=f"Error: {str(e)}",
1408
1412
  )
1409
1413
  return datasets
1410
1414
 
@@ -1416,11 +1420,11 @@ class ModeSource(StatefulIngestionSourceBase):
1416
1420
  f"{self.workspace_uri}/reports/{report_token}/queries"
1417
1421
  )
1418
1422
  queries = queries_json.get("_embedded", {}).get("queries", {})
1419
- except HTTPError as http_error:
1423
+ except ModeRequestError as e:
1420
1424
  self.report.report_failure(
1421
1425
  title="Failed to Retrieve Queries",
1422
1426
  message="Unable to retrieve queries for report token.",
1423
- context=f"Report Token: {report_token}, Error: {str(http_error)}",
1427
+ context=f"Report Token: {report_token}, Error: {str(e)}",
1424
1428
  )
1425
1429
  return queries
1426
1430
 
@@ -1433,11 +1437,11 @@ class ModeSource(StatefulIngestionSourceBase):
1433
1437
  f"{self.workspace_uri}/reports/{report_token}/runs/{report_run_id}/query_runs{query_run_id}"
1434
1438
  )
1435
1439
  queries = queries_json.get("_embedded", {}).get("queries", {})
1436
- except HTTPError as http_error:
1440
+ except ModeRequestError as e:
1437
1441
  self.report.report_failure(
1438
1442
  title="Failed to Retrieve Queries for Report",
1439
1443
  message="Unable to retrieve queries for report token.",
1440
- context=f"Report Token:{report_token}, Error: {str(http_error)}",
1444
+ context=f"Report Token:{report_token}, Error: {str(e)}",
1441
1445
  )
1442
1446
  return {}
1443
1447
  return queries
@@ -1451,13 +1455,13 @@ class ModeSource(StatefulIngestionSourceBase):
1451
1455
  f"/queries/{query_token}/charts"
1452
1456
  )
1453
1457
  charts = charts_json.get("_embedded", {}).get("charts", {})
1454
- except HTTPError as http_error:
1458
+ except ModeRequestError as e:
1455
1459
  self.report.report_failure(
1456
1460
  title="Failed to Retrieve Charts",
1457
1461
  message="Unable to retrieve charts from Mode.",
1458
1462
  context=f"Report Token: {report_token}, "
1459
1463
  f"Query token: {query_token}, "
1460
- f"Error: {str(http_error)}",
1464
+ f"Error: {str(e)}",
1461
1465
  )
1462
1466
  return charts
1463
1467
 
@@ -1477,6 +1481,8 @@ class ModeSource(StatefulIngestionSourceBase):
1477
1481
  response = self.session.get(
1478
1482
  url, timeout=self.config.api_options.timeout
1479
1483
  )
1484
+ if response.status_code == 204: # No content, don't parse json
1485
+ return {}
1480
1486
  return response.json()
1481
1487
  except HTTPError as http_error:
1482
1488
  error_response = http_error.response
@@ -40,6 +40,7 @@ def _get_last_line(query: str) -> str:
40
40
  class ToolMetaExtractorReport(Report):
41
41
  num_queries_meta_extracted: Dict[str, int] = field(default_factory=int_top_k_dict)
42
42
  failures: List[str] = field(default_factory=list)
43
+ looker_user_mapping_missing: Optional[bool] = None
43
44
 
44
45
 
45
46
  class ToolMetaExtractor:
@@ -108,7 +109,9 @@ class ToolMetaExtractor:
108
109
  PlatformResource.search_by_filters(query=query, graph_client=graph)
109
110
  )
110
111
 
111
- if len(platform_resources) > 1:
112
+ if len(platform_resources) == 0:
113
+ report.looker_user_mapping_missing = True
114
+ elif len(platform_resources) > 1:
112
115
  report.failures.append(
113
116
  "Looker user metadata extraction failed. Found more than one looker user id mappings."
114
117
  )