acryl-datahub 0.15.0rc9__py3-none-any.whl → 0.15.0rc10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=-XVob0PhZLue6g_4duveVmhOndcfh_8ifCuVB_YBxkk,574
1
+ datahub/__init__.py,sha256=vEQvZGPQ7hfoL7fm6XTBo6OY37wlMIITNNW1rpZuQCk,575
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -331,7 +331,7 @@ datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKf
331
331
  datahub/ingestion/source/looker/looker_dataclasses.py,sha256=ULWLFWsV2cKmTuOFavD8QjEBmnXmvjyr8RbUB62DwJQ,12178
332
332
  datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbwX9gL83kgL01vIETzzbmZIhmw,4267
333
333
  datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=QTTCW-rPNUoazQG_sTJbCARXJzQ7NKS-XKURp2AAWls,11106
334
- datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=GxK-NkP0Evzv_se3l5f-z-HZZNQRjYr9py_h0T6ReHY,2902
334
+ datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
335
335
  datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
336
336
  datahub/ingestion/source/looker/looker_source.py,sha256=AByQxWVfOBqOtZPaR_cw9SB-tFZtfppiKRkFSbcK1GA,65346
337
337
  datahub/ingestion/source/looker/looker_template_language.py,sha256=EG4ZfVZ0x53lgaYh2ohzL4ZCy9KsX0TA51XqCmsCd2Q,14328
@@ -485,7 +485,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
485
485
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
486
486
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
487
487
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
488
- datahub/ingestion/source/tableau/tableau.py,sha256=LgsVZPMRiJiyyB9-ljj53T8WGUyBSiQwByW_1TSuxC4,130417
488
+ datahub/ingestion/source/tableau/tableau.py,sha256=AFlDng8EfvBvZL692hMf_sfzGwpHpUU6FW_ElR4uitQ,131551
489
489
  datahub/ingestion/source/tableau/tableau_common.py,sha256=Dy_2pvkPucZJsG_LvQZLlxNEkjh-yOXHlZ4jurq9opM,26069
490
490
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
491
491
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
974
974
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
975
975
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
976
976
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
977
- acryl_datahub-0.15.0rc9.dist-info/METADATA,sha256=lTHIKVVIg5N9xzphcWZ_fhOb0yL1MEx4lRidtr2GvJU,172484
978
- acryl_datahub-0.15.0rc9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
- acryl_datahub-0.15.0rc9.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
- acryl_datahub-0.15.0rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
- acryl_datahub-0.15.0rc9.dist-info/RECORD,,
977
+ acryl_datahub-0.15.0rc10.dist-info/METADATA,sha256=oHQM6bvFxv1QboFDXeahyZEbfacayYoFOlOUcPacaeI,172487
978
+ acryl_datahub-0.15.0rc10.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
+ acryl_datahub-0.15.0rc10.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
+ acryl_datahub-0.15.0rc10.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
+ acryl_datahub-0.15.0rc10.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc9"
6
+ __version__ = "0.15.0rc10"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -4,6 +4,7 @@ from typing import ClassVar, Optional, TextIO
4
4
  from liquid import Environment
5
5
  from liquid.ast import Node
6
6
  from liquid.context import Context
7
+ from liquid.filter import string_filter
7
8
  from liquid.parse import expect, get_parser
8
9
  from liquid.stream import TokenStream
9
10
  from liquid.tag import Tag
@@ -81,12 +82,18 @@ class ConditionTag(Tag):
81
82
  custom_tags = [ConditionTag]
82
83
 
83
84
 
85
+ @string_filter
86
+ def sql_quote_filter(variable: str) -> str:
87
+ return f"'{variable}'"
88
+
89
+
84
90
  @lru_cache(maxsize=1)
85
91
  def _create_env() -> Environment:
86
- env: Environment = Environment()
92
+ env: Environment = Environment(strict_filters=False)
87
93
  # register tag. One time activity
88
94
  for custom_tag in custom_tags:
89
95
  env.add_tag(custom_tag)
96
+ env.add_filter("sql_quote", sql_quote_filter)
90
97
  return env
91
98
 
92
99
 
@@ -68,6 +68,7 @@ from datahub.ingestion.api.source import (
68
68
  CapabilityReport,
69
69
  MetadataWorkUnitProcessor,
70
70
  Source,
71
+ StructuredLogLevel,
71
72
  TestableSource,
72
73
  TestConnectionReport,
73
74
  )
@@ -289,16 +290,12 @@ class TableauConnectionConfig(ConfigModel):
289
290
  server.auth.sign_in(authentication)
290
291
  return server
291
292
  except ServerResponseError as e:
293
+ message = f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
292
294
  if isinstance(authentication, PersonalAccessTokenAuth):
293
295
  # Docs on token expiry in Tableau:
294
296
  # https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm#token-expiry
295
- logger.info(
296
- "Error authenticating with Tableau. Note that Tableau personal access tokens "
297
- "expire if not used for 15 days or if over 1 year old"
298
- )
299
- raise ValueError(
300
- f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
301
- ) from e
297
+ message = f"Error authenticating with Tableau. Note that Tableau personal access tokens expire if not used for 15 days or if over 1 year old: {str(e)}"
298
+ raise ValueError(message) from e
302
299
  except Exception as e:
303
300
  raise ValueError(
304
301
  f"Unable to login (check your Tableau connection and credentials): {str(e)}"
@@ -700,6 +697,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
700
697
  config=self.config,
701
698
  ctx=self.ctx,
702
699
  site=site,
700
+ site_id=site.id,
703
701
  report=self.report,
704
702
  server=self.server,
705
703
  platform=self.platform,
@@ -707,11 +705,19 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
707
705
  logger.info(f"Ingesting assets of site '{site.content_url}'.")
708
706
  yield from site_source.ingest_tableau_site()
709
707
  else:
710
- site = self.server.sites.get_by_id(self.server.site_id)
708
+ site = None
709
+ with self.report.report_exc(
710
+ title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.",
711
+ message="This usually indicates missing permissions. Ensure that you have all necessary permissions.",
712
+ level=StructuredLogLevel.WARN,
713
+ ):
714
+ site = self.server.sites.get_by_id(self.server.site_id)
715
+
711
716
  site_source = TableauSiteSource(
712
717
  config=self.config,
713
718
  ctx=self.ctx,
714
719
  site=site,
720
+ site_id=self.server.site_id,
715
721
  report=self.report,
716
722
  server=self.server,
717
723
  platform=self.platform,
@@ -722,6 +728,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
722
728
  title="Failed to Retrieve Tableau Metadata",
723
729
  message="Unable to retrieve metadata from tableau.",
724
730
  context=str(md_exception),
731
+ exc=md_exception,
725
732
  )
726
733
 
727
734
  def close(self) -> None:
@@ -743,7 +750,8 @@ class TableauSiteSource:
743
750
  self,
744
751
  config: TableauConfig,
745
752
  ctx: PipelineContext,
746
- site: SiteItem,
753
+ site: Optional[SiteItem],
754
+ site_id: Optional[str],
747
755
  report: TableauSourceReport,
748
756
  server: Server,
749
757
  platform: str,
@@ -752,9 +760,16 @@ class TableauSiteSource:
752
760
  self.report = report
753
761
  self.server: Server = server
754
762
  self.ctx: PipelineContext = ctx
755
- self.site: SiteItem = site
756
763
  self.platform = platform
757
764
 
765
+ self.site: Optional[SiteItem] = site
766
+ if site_id is not None:
767
+ self.site_id: str = site_id
768
+ else:
769
+ assert self.site is not None, "site or site_id is required"
770
+ assert self.site.id is not None, "site_id is required when site is provided"
771
+ self.site_id = self.site.id
772
+
758
773
  self.database_tables: Dict[str, DatabaseTable] = {}
759
774
  self.tableau_stat_registry: Dict[str, UsageStat] = {}
760
775
  self.tableau_project_registry: Dict[str, TableauProject] = {}
@@ -808,7 +823,7 @@ class TableauSiteSource:
808
823
  def _re_authenticate(self):
809
824
  tableau_auth: Union[
810
825
  TableauAuth, PersonalAccessTokenAuth
811
- ] = self.config.get_tableau_auth(self.site.content_url)
826
+ ] = self.config.get_tableau_auth(self.site_id)
812
827
  self.server.auth.sign_in(tableau_auth)
813
828
 
814
829
  @property
@@ -826,6 +841,7 @@ class TableauSiteSource:
826
841
  if not view.id:
827
842
  continue
828
843
  self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views)
844
+ logger.info(f"Got Tableau stats for {len(self.tableau_stat_registry)} assets")
829
845
  logger.debug("Tableau stats %s", self.tableau_stat_registry)
830
846
 
831
847
  def _populate_database_server_hostname_map(self) -> None:
@@ -876,7 +892,7 @@ class TableauSiteSource:
876
892
  ancestors = [cur_proj.name]
877
893
  while cur_proj.parent_id is not None:
878
894
  if cur_proj.parent_id not in all_project_map:
879
- self.report.report_warning(
895
+ self.report.warning(
880
896
  "project-issue",
881
897
  f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.",
882
898
  )
@@ -974,8 +990,11 @@ class TableauSiteSource:
974
990
  self.datasource_project_map[ds.id] = ds.project_id
975
991
  except Exception as e:
976
992
  self.report.get_all_datasources_query_failed = True
977
- logger.info(f"Get all datasources query failed due to error {e}")
978
- logger.debug("Error stack trace", exc_info=True)
993
+ self.report.warning(
994
+ title="Unexpected Query Error",
995
+ message="Get all datasources query failed due to error",
996
+ exc=e,
997
+ )
979
998
 
980
999
  def _init_workbook_registry(self) -> None:
981
1000
  if self.server is None:
@@ -1141,7 +1160,6 @@ class TableauSiteSource:
1141
1160
  )
1142
1161
 
1143
1162
  if node_limit_errors:
1144
- logger.debug(f"Node Limit Error. query_data {query_data}")
1145
1163
  self.report.warning(
1146
1164
  title="Tableau Data Exceed Predefined Limit",
1147
1165
  message="The numbers of record in result set exceeds a predefined limit. Increase the tableau "
@@ -1257,9 +1275,10 @@ class TableauSiteSource:
1257
1275
  wrk_id: Optional[str] = workbook.get(c.ID)
1258
1276
  prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)
1259
1277
 
1260
- logger.debug(
1261
- f"Skipping workbook {wrk_name}({wrk_id}) as it is project {prj_name}({project_luid}) not "
1262
- f"present in project registry"
1278
+ self.report.warning(
1279
+ title="Skipping Missing Workbook",
1280
+ message="Skipping workbook as its project is not present in project registry",
1281
+ context=f"workbook={wrk_name}({wrk_id}), project={prj_name}({project_luid})",
1263
1282
  )
1264
1283
  continue
1265
1284
 
@@ -1453,7 +1472,7 @@ class TableauSiteSource:
1453
1472
  c.COLUMNS_CONNECTION
1454
1473
  ].get("totalCount")
1455
1474
  if not is_custom_sql and not num_tbl_cols:
1456
- logger.debug(
1475
+ logger.warning(
1457
1476
  f"Skipping upstream table with id {table[c.ID]}, no columns: {table}"
1458
1477
  )
1459
1478
  continue
@@ -1469,7 +1488,12 @@ class TableauSiteSource:
1469
1488
  table, default_schema_map=self.config.default_schema_map
1470
1489
  )
1471
1490
  except Exception as e:
1472
- logger.info(f"Failed to generate upstream reference for {table}: {e}")
1491
+ self.report.warning(
1492
+ title="Potentially Missing Lineage Issue",
1493
+ message="Failed to generate upstream reference",
1494
+ exc=e,
1495
+ context=f"table={table}",
1496
+ )
1473
1497
  continue
1474
1498
 
1475
1499
  table_urn = ref.make_dataset_urn(
@@ -1917,10 +1941,12 @@ class TableauSiteSource:
1917
1941
  self.datasource_project_map[ds_result.id] = ds_result.project_id
1918
1942
  except Exception as e:
1919
1943
  self.report.num_get_datasource_query_failures += 1
1920
- logger.warning(
1921
- f"Failed to get datasource project_luid for {ds_luid} due to error {e}"
1944
+ self.report.warning(
1945
+ title="Unexpected Query Error",
1946
+ message="Failed to get datasource details",
1947
+ exc=e,
1948
+ context=f"ds_luid={ds_luid}",
1922
1949
  )
1923
- logger.debug("Error stack trace", exc_info=True)
1924
1950
 
1925
1951
  def _get_workbook_project_luid(self, wb: dict) -> Optional[str]:
1926
1952
  if wb.get(c.LUID) and self.workbook_project_map.get(wb[c.LUID]):
@@ -3181,10 +3207,10 @@ class TableauSiteSource:
3181
3207
  else:
3182
3208
  # This is a root Tableau project since the parent_project_id is None.
3183
3209
  # For a root project, either the site is the parent, or the platform is the default parent.
3184
- if self.config.add_site_container and self.site and self.site.id:
3210
+ if self.config.add_site_container:
3185
3211
  # The site containers have already been generated by emit_site_container, so we
3186
3212
  # don't need to emit them again here.
3187
- parent_project_key = self.gen_site_key(self.site.id)
3213
+ parent_project_key = self.gen_site_key(self.site_id)
3188
3214
 
3189
3215
  yield from gen_containers(
3190
3216
  container_key=project_key,
@@ -3201,12 +3227,12 @@ class TableauSiteSource:
3201
3227
  yield from emit_project_in_topological_order(project)
3202
3228
 
3203
3229
  def emit_site_container(self):
3204
- if not self.site or not self.site.id:
3230
+ if not self.site:
3205
3231
  logger.warning("Can not ingest site container. No site information found.")
3206
3232
  return
3207
3233
 
3208
3234
  yield from gen_containers(
3209
- container_key=self.gen_site_key(self.site.id),
3235
+ container_key=self.gen_site_key(self.site_id),
3210
3236
  name=self.site.name or "Default",
3211
3237
  sub_types=[c.SITE],
3212
3238
  )